Commit 8e327b9c authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba
Browse files

btrfs: dump all space infos if we abort transaction due to ENOSPC



We have hit some transaction abort due to -ENOSPC internally.

Normally we should always reserve enough space for metadata for every
transaction, thus hitting -ENOSPC should really indicate some cases we
didn't expect.

But unfortunately current error reporting will only give a kernel
warning and stack trace, not really helpful to debug what's causing the
problem.

And mount option debug_enospc can only help when user can reproduce the
problem, but under most cases, such transaction abort by -ENOSPC is
really hard to reproduce.

So this patch will dump all space infos (data, metadata, system) when we
abort the first transaction with -ENOSPC.

This should at least provide some clue to us.

The example of a dump would look like this:

  BTRFS: Transaction aborted (error -28)
  WARNING: CPU: 8 PID: 3366 at fs/btrfs/transaction.c:2137 btrfs_commit_transaction+0xf81/0xfb0 [btrfs]
  <call trace skipped>
  ---[ end trace 0000000000000000 ]---
  BTRFS info (device dm-1: state A): dumping space info:
  BTRFS info (device dm-1: state A): space_info DATA has 6791168 free, is not full
  BTRFS info (device dm-1: state A): space_info total=8388608, used=1597440, pinned=0, reserved=0, may_use=0, readonly=0 zone_unusable=0
  BTRFS info (device dm-1: state A): space_info METADATA has 257114112 free, is not full
  BTRFS info (device dm-1: state A): space_info total=268435456, used=131072, pinned=180224, reserved=65536, may_use=10878976, readonly=65536 zone_unusable=0
  BTRFS info (device dm-1: state A): space_info SYSTEM has 8372224 free, is not full
  BTRFS info (device dm-1: state A): space_info total=8388608, used=16384, pinned=0, reserved=0, may_use=0, readonly=0 zone_unusable=0
  BTRFS info (device dm-1: state A): global_block_rsv: size 3670016 reserved 3670016
  BTRFS info (device dm-1: state A): trans_block_rsv: size 0 reserved 0
  BTRFS info (device dm-1: state A): chunk_block_rsv: size 0 reserved 0
  BTRFS info (device dm-1: state A): delayed_block_rsv: size 4063232 reserved 4063232
  BTRFS info (device dm-1: state A): delayed_refs_rsv: size 3145728 reserved 3145728
  BTRFS: error (device dm-1: state A) in btrfs_commit_transaction:2137: errno=-28 No space left
  BTRFS info (device dm-1: state EA): forced readonly

Reviewed-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 25a860c4
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -3828,7 +3828,7 @@ const char * __attribute_const__ btrfs_decode_error(int errno);
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
			       const char *function,
			       unsigned int line, int errno);
			       unsigned int line, int errno, bool first_hit);

/*
 * Call btrfs_abort_transaction as early as possible when an error condition is
@@ -3836,9 +3836,11 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
 */
#define btrfs_abort_transaction(trans, errno)		\
do {								\
	bool first = false;					\
	/* Report first abort since mount */			\
	if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,	\
			&((trans)->fs_info->fs_state))) {	\
		first = true;					\
		if ((errno) != -EIO && (errno) != -EROFS) {		\
			WARN(1, KERN_DEBUG				\
			"BTRFS: Transaction aborted (error %d)\n",	\
@@ -3850,7 +3852,7 @@ do { \
		}						\
	}							\
	__btrfs_abort_transaction((trans), __func__,		\
				  __LINE__, (errno));		\
				  __LINE__, (errno), first);	\
} while (0)

#ifdef CONFIG_PRINTK_INDEX
+24 −7
Original line number Diff line number Diff line
@@ -492,6 +492,15 @@ static const char *space_info_flag_to_str(const struct btrfs_space_info *space_i
	}
}

static void dump_global_block_rsv(struct btrfs_fs_info *fs_info)
{
	DUMP_BLOCK_RSV(fs_info, global_block_rsv);
	DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
	DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
	DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
	DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
}

static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
				    struct btrfs_space_info *info)
{
@@ -508,13 +517,6 @@ static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
		info->total_bytes, info->bytes_used, info->bytes_pinned,
		info->bytes_reserved, info->bytes_may_use,
		info->bytes_readonly, info->bytes_zone_unusable);

	DUMP_BLOCK_RSV(fs_info, global_block_rsv);
	DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
	DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
	DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
	DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);

}

void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
@@ -526,6 +528,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,

	spin_lock(&info->lock);
	__btrfs_dump_space_info(fs_info, info);
	dump_global_block_rsv(fs_info);
	spin_unlock(&info->lock);

	if (!dump_block_groups)
@@ -1770,3 +1773,17 @@ int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
	}
	return ret;
}

/* Dump all the space infos when we abort a transaction due to ENOSPC. */
__cold void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info)
{
	struct btrfs_space_info *space_info;

	btrfs_info(fs_info, "dumping space info:");
	list_for_each_entry(space_info, &fs_info->space_info, list) {
		spin_lock(&space_info->lock);
		__btrfs_dump_space_info(fs_info, space_info);
		spin_unlock(&space_info->lock);
	}
	dump_global_block_rsv(fs_info);
}
+2 −0
Original line number Diff line number Diff line
@@ -157,4 +157,6 @@ static inline void btrfs_space_info_free_bytes_may_use(
}
int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
			     enum btrfs_reserve_flush_enum flush);
void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info);

#endif /* BTRFS_SPACE_INFO_H */
+3 −1
Original line number Diff line number Diff line
@@ -346,12 +346,14 @@ void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info)
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
			       const char *function,
			       unsigned int line, int errno)
			       unsigned int line, int errno, bool first_hit)
{
	struct btrfs_fs_info *fs_info = trans->fs_info;

	WRITE_ONCE(trans->aborted, errno);
	WRITE_ONCE(trans->transaction->aborted, errno);
	if (first_hit && errno == -ENOSPC)
		btrfs_dump_space_info_for_trans_abort(fs_info);
	/* Wake up anybody who may be waiting on this transaction */
	wake_up(&fs_info->transaction_wait);
	wake_up(&fs_info->transaction_blocked_wait);