Commit 15b593ba authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ext4_for_linus-6.5-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "Bug and regression fixes for 6.5-rc3 for ext4's mballoc and jbd2's
  checkpoint code"

* tag 'ext4_for_linus-6.5-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix rbtree traversal bug in ext4_mb_use_preallocated
  ext4: fix off by one issue in ext4_mb_choose_next_group_best_avail()
  ext4: correct inline offset when handling xattrs in inode body
  jbd2: remove __journal_try_to_free_buffer()
  jbd2: fix a race when checking checkpoint buffer busy
  jbd2: Fix wrongly judgement for buffer head removing while doing checkpoint
  jbd2: remove journal_clean_one_cp_list()
  jbd2: remove t_checkpoint_io_list
  jbd2: recheck chechpointing non-dirty buffer
parents 8266f53b 9d3de7ee
Loading
Loading
Loading
Loading
+140 −32
Original line number Diff line number Diff line
@@ -1006,14 +1006,11 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
	 * fls() instead since we need to know the actual length while modifying
	 * goal length.
	 */
	order = fls(ac->ac_g_ex.fe_len);
	order = fls(ac->ac_g_ex.fe_len) - 1;
	min_order = order - sbi->s_mb_best_avail_max_trim_order;
	if (min_order < 0)
		min_order = 0;

	if (1 << min_order < ac->ac_o_ex.fe_len)
		min_order = fls(ac->ac_o_ex.fe_len) + 1;

	if (sbi->s_stripe > 0) {
		/*
		 * We are assuming that stripe size is always a multiple of
@@ -1021,9 +1018,16 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
		 */
		num_stripe_clusters = EXT4_NUM_B2C(sbi, sbi->s_stripe);
		if (1 << min_order < num_stripe_clusters)
			min_order = fls(num_stripe_clusters);
			/*
			 * We consider 1 order less because later we round
			 * up the goal len to num_stripe_clusters
			 */
			min_order = fls(num_stripe_clusters) - 1;
	}

	if (1 << min_order < ac->ac_o_ex.fe_len)
		min_order = fls(ac->ac_o_ex.fe_len);

	for (i = order; i >= min_order; i--) {
		int frag_order;
		/*
@@ -4761,8 +4765,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
	int order, i;
	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
	struct ext4_locality_group *lg;
	struct ext4_prealloc_space *tmp_pa, *cpa = NULL;
	ext4_lblk_t tmp_pa_start, tmp_pa_end;
	struct ext4_prealloc_space *tmp_pa = NULL, *cpa = NULL;
	loff_t tmp_pa_end;
	struct rb_node *iter;
	ext4_fsblk_t goal_block;

@@ -4770,47 +4774,151 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
		return false;

	/* first, try per-file preallocation */
	/*
	 * first, try per-file preallocation by searching the inode pa rbtree.
	 *
	 * Here, we can't do a direct traversal of the tree because
	 * ext4_mb_discard_group_preallocation() can paralelly mark the pa
	 * deleted and that can cause direct traversal to skip some entries.
	 */
	read_lock(&ei->i_prealloc_lock);

	if (RB_EMPTY_ROOT(&ei->i_prealloc_node)) {
		goto try_group_pa;
	}

	/*
	 * Step 1: Find a pa with logical start immediately adjacent to the
	 * original logical start. This could be on the left or right.
	 *
	 * (tmp_pa->pa_lstart never changes so we can skip locking for it).
	 */
	for (iter = ei->i_prealloc_node.rb_node; iter;
	     iter = ext4_mb_pa_rb_next_iter(ac->ac_o_ex.fe_logical,
					    tmp_pa_start, iter)) {
					    tmp_pa->pa_lstart, iter)) {
		tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
				  pa_node.inode_node);
	}

		/* all fields in this condition don't change,
		 * so we can skip locking for them */
		tmp_pa_start = tmp_pa->pa_lstart;
		tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
	/*
	 * Step 2: The adjacent pa might be to the right of logical start, find
	 * the left adjacent pa. After this step we'd have a valid tmp_pa whose
	 * logical start is towards the left of original request's logical start
	 */
	if (tmp_pa->pa_lstart > ac->ac_o_ex.fe_logical) {
		struct rb_node *tmp;
		tmp = rb_prev(&tmp_pa->pa_node.inode_node);

		/* original request start doesn't lie in this PA */
		if (ac->ac_o_ex.fe_logical < tmp_pa_start ||
		    ac->ac_o_ex.fe_logical >= tmp_pa_end)
			continue;
		if (tmp) {
			tmp_pa = rb_entry(tmp, struct ext4_prealloc_space,
					    pa_node.inode_node);
		} else {
			/*
			 * If there is no adjacent pa to the left then finding
			 * an overlapping pa is not possible hence stop searching
			 * inode pa tree
			 */
			goto try_group_pa;
		}
	}

	BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical));

	/*
	 * Step 3: If the left adjacent pa is deleted, keep moving left to find
	 * the first non deleted adjacent pa. After this step we should have a
	 * valid tmp_pa which is guaranteed to be non deleted.
	 */
	for (iter = &tmp_pa->pa_node.inode_node;; iter = rb_prev(iter)) {
		if (!iter) {
			/*
			 * no non deleted left adjacent pa, so stop searching
			 * inode pa tree
			 */
			goto try_group_pa;
		}
		tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
				  pa_node.inode_node);
		spin_lock(&tmp_pa->pa_lock);
		if (tmp_pa->pa_deleted == 0) {
			/*
			 * We will keep holding the pa_lock from
			 * this point on because we don't want group discard
			 * to delete this pa underneath us. Since group
			 * discard is anyways an ENOSPC operation it
			 * should be okay for it to wait a few more cycles.
			 */
			break;
		} else {
			spin_unlock(&tmp_pa->pa_lock);
		}
	}

	BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical));
	BUG_ON(tmp_pa->pa_deleted == 1);

	/*
	 * Step 4: We now have the non deleted left adjacent pa. Only this
	 * pa can possibly satisfy the request hence check if it overlaps
	 * original logical start and stop searching if it doesn't.
	 */
	tmp_pa_end = (loff_t)tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);

	if (ac->ac_o_ex.fe_logical >= tmp_pa_end) {
		spin_unlock(&tmp_pa->pa_lock);
		goto try_group_pa;
	}

	/* non-extent files can't have physical blocks past 2^32 */
	if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
	    (tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) >
	     EXT4_MAX_BLOCK_FILE_PHYS)) {
		/*
			 * Since PAs don't overlap, we won't find any
			 * other PA to satisfy this.
		 * Since PAs don't overlap, we won't find any other PA to
		 * satisfy this.
		 */
			break;
		spin_unlock(&tmp_pa->pa_lock);
		goto try_group_pa;
	}

		/* found preallocated blocks, use them */
		spin_lock(&tmp_pa->pa_lock);
		if (tmp_pa->pa_deleted == 0 && tmp_pa->pa_free &&
		    likely(ext4_mb_pa_goal_check(ac, tmp_pa))) {
	if (tmp_pa->pa_free && likely(ext4_mb_pa_goal_check(ac, tmp_pa))) {
		atomic_inc(&tmp_pa->pa_count);
		ext4_mb_use_inode_pa(ac, tmp_pa);
		spin_unlock(&tmp_pa->pa_lock);
		read_unlock(&ei->i_prealloc_lock);
		return true;
	} else {
		/*
		 * We found a valid overlapping pa but couldn't use it because
		 * it had no free blocks. This should ideally never happen
		 * because:
		 *
		 * 1. When a new inode pa is added to rbtree it must have
		 *    pa_free > 0 since otherwise we won't actually need
		 *    preallocation.
		 *
		 * 2. An inode pa that is in the rbtree can only have it's
		 *    pa_free become zero when another thread calls:
		 *      ext4_mb_new_blocks
		 *       ext4_mb_use_preallocated
		 *        ext4_mb_use_inode_pa
		 *
		 * 3. Further, after the above calls make pa_free == 0, we will
		 *    immediately remove it from the rbtree in:
		 *      ext4_mb_new_blocks
		 *       ext4_mb_release_context
		 *        ext4_mb_put_pa
		 *
		 * 4. Since the pa_free becoming 0 and pa_free getting removed
		 * from tree both happen in ext4_mb_new_blocks, which is always
		 * called with i_data_sem held for data allocations, we can be
		 * sure that another process will never see a pa in rbtree with
		 * pa_free == 0.
		 */
		WARN_ON_ONCE(tmp_pa->pa_free == 0);
	}
	spin_unlock(&tmp_pa->pa_lock);
	}
try_group_pa:
	read_unlock(&ei->i_prealloc_lock);

	/* can we use group allocation? */
+14 −0
Original line number Diff line number Diff line
@@ -1782,6 +1782,20 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
		memmove(here, (void *)here + size,
			(void *)last - (void *)here + sizeof(__u32));
		memset(last, 0, size);

		/*
		 * Update i_inline_off - moved ibody region might contain
		 * system.data attribute.  Handling a failure here won't
		 * cause other complications for setting an xattr.
		 */
		if (!is_block && ext4_has_inline_data(inode)) {
			ret = ext4_find_inline_data_nolock(inode);
			if (ret) {
				ext4_warning_inode(inode,
					"unable to update i_inline_off");
				goto out;
			}
		}
	} else if (s->not_found) {
		/* Insert new name. */
		size_t size = EXT4_XATTR_LEN(name_len);
+94 −183
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@
 *
 * Called with j_list_lock held.
 */
static inline void __buffer_unlink_first(struct journal_head *jh)
static inline void __buffer_unlink(struct journal_head *jh)
{
	transaction_t *transaction = jh->b_cp_transaction;

@@ -40,45 +40,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh)
	}
}

/*
 * Unlink a buffer from a transaction checkpoint(io) list.
 *
 * Called with j_list_lock held.
 */
static inline void __buffer_unlink(struct journal_head *jh)
{
	transaction_t *transaction = jh->b_cp_transaction;

	__buffer_unlink_first(jh);
	if (transaction->t_checkpoint_io_list == jh) {
		transaction->t_checkpoint_io_list = jh->b_cpnext;
		if (transaction->t_checkpoint_io_list == jh)
			transaction->t_checkpoint_io_list = NULL;
	}
}

/*
 * Move a buffer from the checkpoint list to the checkpoint io list
 *
 * Called with j_list_lock held
 */
static inline void __buffer_relink_io(struct journal_head *jh)
{
	transaction_t *transaction = jh->b_cp_transaction;

	__buffer_unlink_first(jh);

	if (!transaction->t_checkpoint_io_list) {
		jh->b_cpnext = jh->b_cpprev = jh;
	} else {
		jh->b_cpnext = transaction->t_checkpoint_io_list;
		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
		jh->b_cpprev->b_cpnext = jh;
		jh->b_cpnext->b_cpprev = jh;
	}
	transaction->t_checkpoint_io_list = jh;
}

/*
 * Check a checkpoint buffer could be release or not.
 *
@@ -183,6 +144,7 @@ __flush_batch(journal_t *journal, int *batch_count)
		struct buffer_head *bh = journal->j_chkpt_bhs[i];
		BUFFER_TRACE(bh, "brelse");
		__brelse(bh);
		journal->j_chkpt_bhs[i] = NULL;
	}
	*batch_count = 0;
}
@@ -242,15 +204,6 @@ int jbd2_log_do_checkpoint(journal_t *journal)
		jh = transaction->t_checkpoint_list;
		bh = jh2bh(jh);

		if (buffer_locked(bh)) {
			get_bh(bh);
			spin_unlock(&journal->j_list_lock);
			wait_on_buffer(bh);
			/* the journal_head may have gone by now */
			BUFFER_TRACE(bh, "brelse");
			__brelse(bh);
			goto retry;
		}
		if (jh->b_transaction != NULL) {
			transaction_t *t = jh->b_transaction;
			tid_t tid = t->t_tid;
@@ -285,30 +238,50 @@ int jbd2_log_do_checkpoint(journal_t *journal)
			spin_lock(&journal->j_list_lock);
			goto restart;
		}
		if (!buffer_dirty(bh)) {
		if (!trylock_buffer(bh)) {
			/*
			 * The buffer is locked, it may be writing back, or
			 * flushing out in the last couple of cycles, or
			 * re-adding into a new transaction, need to check
			 * it again until it's unlocked.
			 */
			get_bh(bh);
			spin_unlock(&journal->j_list_lock);
			wait_on_buffer(bh);
			/* the journal_head may have gone by now */
			BUFFER_TRACE(bh, "brelse");
			__brelse(bh);
			goto retry;
		} else if (!buffer_dirty(bh)) {
			unlock_buffer(bh);
			BUFFER_TRACE(bh, "remove from checkpoint");
			if (__jbd2_journal_remove_checkpoint(jh))
				/* The transaction was released; we're done */
			/*
			 * If the transaction was released or the checkpoint
			 * list was empty, we're done.
			 */
			if (__jbd2_journal_remove_checkpoint(jh) ||
			    !transaction->t_checkpoint_list)
				goto out;
			continue;
		}
		} else {
			unlock_buffer(bh);
			/*
		 * Important: we are about to write the buffer, and
		 * possibly block, while still holding the journal
		 * lock.  We cannot afford to let the transaction
		 * logic start messing around with this buffer before
		 * we write it to disk, as that would break
		 * recoverability.
			 * We are about to write the buffer, it could be
			 * raced by some other transaction shrink or buffer
			 * re-log logic once we release the j_list_lock,
			 * leave it on the checkpoint list and check status
			 * again to make sure it's clean.
			 */
			BUFFER_TRACE(bh, "queue");
			get_bh(bh);
			J_ASSERT_BH(bh, !buffer_jwrite(bh));
			journal->j_chkpt_bhs[batch_count++] = bh;
		__buffer_relink_io(jh);
			transaction->t_chp_stats.cs_written++;
			transaction->t_checkpoint_list = jh->b_cpnext;
		}

		if ((batch_count == JBD2_NR_BATCH) ||
		    need_resched() ||
		    spin_needbreak(&journal->j_list_lock))
		    need_resched() || spin_needbreak(&journal->j_list_lock) ||
		    jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
			goto unlock_and_flush;
	}

@@ -322,38 +295,6 @@ int jbd2_log_do_checkpoint(journal_t *journal)
			goto restart;
	}

	/*
	 * Now we issued all of the transaction's buffers, let's deal
	 * with the buffers that are out for I/O.
	 */
restart2:
	/* Did somebody clean up the transaction in the meanwhile? */
	if (journal->j_checkpoint_transactions != transaction ||
	    transaction->t_tid != this_tid)
		goto out;

	while (transaction->t_checkpoint_io_list) {
		jh = transaction->t_checkpoint_io_list;
		bh = jh2bh(jh);
		if (buffer_locked(bh)) {
			get_bh(bh);
			spin_unlock(&journal->j_list_lock);
			wait_on_buffer(bh);
			/* the journal_head may have gone by now */
			BUFFER_TRACE(bh, "brelse");
			__brelse(bh);
			spin_lock(&journal->j_list_lock);
			goto restart2;
		}

		/*
		 * Now in whatever state the buffer currently is, we
		 * know that it has been written out and so we can
		 * drop it from the list
		 */
		if (__jbd2_journal_remove_checkpoint(jh))
			break;
	}
out:
	spin_unlock(&journal->j_list_lock);
	result = jbd2_cleanup_journal_tail(journal);
@@ -408,50 +349,10 @@ int jbd2_cleanup_journal_tail(journal_t *journal)

/* Checkpoint list management */

/*
 * journal_clean_one_cp_list
 *
 * Find all the written-back checkpoint buffers in the given list and
 * release them. If 'destroy' is set, clean all buffers unconditionally.
 *
 * Called with j_list_lock held.
 * Returns 1 if we freed the transaction, 0 otherwise.
 */
static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
{
	struct journal_head *last_jh;
	struct journal_head *next_jh = jh;

	if (!jh)
		return 0;

	last_jh = jh->b_cpprev;
	do {
		jh = next_jh;
		next_jh = jh->b_cpnext;

		if (!destroy && __cp_buffer_busy(jh))
			return 0;

		if (__jbd2_journal_remove_checkpoint(jh))
			return 1;
		/*
		 * This function only frees up some memory
		 * if possible so we dont have an obligation
		 * to finish processing. Bail out if preemption
		 * requested:
		 */
		if (need_resched())
			return 0;
	} while (jh != last_jh);

	return 0;
}

/*
 * journal_shrink_one_cp_list
 *
 * Find 'nr_to_scan' written-back checkpoint buffers in the given list
 * Find all the written-back checkpoint buffers in the given list
 * and try to release them. If the whole transaction is released, set
 * the 'released' parameter. Return the number of released checkpointed
 * buffers.
@@ -459,15 +360,15 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
 * Called with j_list_lock held.
 */
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
						unsigned long *nr_to_scan,
						bool *released)
						bool destroy, bool *released)
{
	struct journal_head *last_jh;
	struct journal_head *next_jh = jh;
	unsigned long nr_freed = 0;
	int ret;

	if (!jh || *nr_to_scan == 0)
	*released = false;
	if (!jh)
		return 0;

	last_jh = jh->b_cpprev;
@@ -475,12 +376,15 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
		jh = next_jh;
		next_jh = jh->b_cpnext;

		(*nr_to_scan)--;
		if (__cp_buffer_busy(jh))
		if (destroy) {
			ret = __jbd2_journal_remove_checkpoint(jh);
		} else {
			ret = jbd2_journal_try_remove_checkpoint(jh);
			if (ret < 0)
				continue;
		}

		nr_freed++;
		ret = __jbd2_journal_remove_checkpoint(jh);
		if (ret) {
			*released = true;
			break;
@@ -488,7 +392,7 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,

		if (need_resched())
			break;
	} while (jh != last_jh && *nr_to_scan);
	} while (jh != last_jh);

	return nr_freed;
}
@@ -506,11 +410,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
						  unsigned long *nr_to_scan)
{
	transaction_t *transaction, *last_transaction, *next_transaction;
	bool released;
	bool __maybe_unused released;
	tid_t first_tid = 0, last_tid = 0, next_tid = 0;
	tid_t tid = 0;
	unsigned long nr_freed = 0;
	unsigned long nr_scanned = *nr_to_scan;
	unsigned long freed;

again:
	spin_lock(&journal->j_list_lock);
@@ -539,19 +443,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
		transaction = next_transaction;
		next_transaction = transaction->t_cpnext;
		tid = transaction->t_tid;
		released = false;

		nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
						       nr_to_scan, &released);
		if (*nr_to_scan == 0)
			break;
		if (need_resched() || spin_needbreak(&journal->j_list_lock))
			break;
		if (released)
			continue;

		nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
						       nr_to_scan, &released);
		freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
						   false, &released);
		nr_freed += freed;
		(*nr_to_scan) -= min(*nr_to_scan, freed);
		if (*nr_to_scan == 0)
			break;
		if (need_resched() || spin_needbreak(&journal->j_list_lock))
@@ -572,9 +468,8 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
	if (*nr_to_scan && next_tid)
		goto again;
out:
	nr_scanned -= *nr_to_scan;
	trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
					  nr_freed, nr_scanned, next_tid);
					  nr_freed, next_tid);

	return nr_freed;
}
@@ -590,7 +485,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
{
	transaction_t *transaction, *last_transaction, *next_transaction;
	int ret;
	bool released;

	transaction = journal->j_checkpoint_transactions;
	if (!transaction)
@@ -601,8 +496,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
	do {
		transaction = next_transaction;
		next_transaction = transaction->t_cpnext;
		ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
						destroy);
		journal_shrink_one_cp_list(transaction->t_checkpoint_list,
					   destroy, &released);
		/*
		 * This function only frees up some memory if possible so we
		 * dont have an obligation to finish processing. Bail out if
@@ -610,23 +505,12 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
		 */
		if (need_resched())
			return;
		if (ret)
			continue;
		/*
		 * It is essential that we are as careful as in the case of
		 * t_checkpoint_list with removing the buffer from the list as
		 * we can possibly see not yet submitted buffers on io_list
		 */
		ret = journal_clean_one_cp_list(transaction->
				t_checkpoint_io_list, destroy);
		if (need_resched())
			return;
		/*
		 * Stop scanning if we couldn't free the transaction. This
		 * avoids pointless scanning of transactions which still
		 * weren't checkpointed.
		 */
		if (!ret)
		if (!released)
			return;
	} while (transaction != last_transaction);
}
@@ -705,7 +589,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
	jbd2_journal_put_journal_head(jh);

	/* Is this transaction empty? */
	if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
	if (transaction->t_checkpoint_list)
		return 0;

	/*
@@ -736,6 +620,34 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
	return 1;
}

/*
 * Check the checkpoint buffer and try to remove it from the checkpoint
 * list if it's clean. Returns -EBUSY if it is not clean, returns 1 if
 * it frees the transaction, 0 otherwise.
 *
 * This function is called with j_list_lock held.
 */
int jbd2_journal_try_remove_checkpoint(struct journal_head *jh)
{
	struct buffer_head *bh = jh2bh(jh);

	if (!trylock_buffer(bh))
		return -EBUSY;
	if (buffer_dirty(bh)) {
		unlock_buffer(bh);
		return -EBUSY;
	}
	unlock_buffer(bh);

	/*
	 * Buffer is clean and the IO has finished (we held the buffer
	 * lock) so the checkpoint is done. We can safely remove the
	 * buffer from this transaction.
	 */
	JBUFFER_TRACE(jh, "remove from checkpoint list");
	return __jbd2_journal_remove_checkpoint(jh);
}

/*
 * journal_insert_checkpoint: put a committed buffer onto a checkpoint
 * list so that we know when it is safe to clean the transaction out of
@@ -797,7 +709,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
	J_ASSERT(transaction->t_forget == NULL);
	J_ASSERT(transaction->t_shadow_list == NULL);
	J_ASSERT(transaction->t_checkpoint_list == NULL);
	J_ASSERT(transaction->t_checkpoint_io_list == NULL);
	J_ASSERT(atomic_read(&transaction->t_updates) == 0);
	J_ASSERT(journal->j_committing_transaction != transaction);
	J_ASSERT(journal->j_running_transaction != transaction);
+1 −2
Original line number Diff line number Diff line
@@ -1141,8 +1141,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
	spin_lock(&journal->j_list_lock);
	commit_transaction->t_state = T_FINISHED;
	/* Check if the transaction can be dropped now that we are finished */
	if (commit_transaction->t_checkpoint_list == NULL &&
	    commit_transaction->t_checkpoint_io_list == NULL) {
	if (commit_transaction->t_checkpoint_list == NULL) {
		__jbd2_journal_drop_transaction(journal, commit_transaction);
		jbd2_journal_free_transaction(commit_transaction);
	}
+8 −32
Original line number Diff line number Diff line
@@ -1784,8 +1784,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
		 * Otherwise, if the buffer has been written to disk,
		 * it is safe to remove the checkpoint and drop it.
		 */
		if (!buffer_dirty(bh)) {
			__jbd2_journal_remove_checkpoint(jh);
		if (jbd2_journal_try_remove_checkpoint(jh) >= 0) {
			spin_unlock(&journal->j_list_lock);
			goto drop;
		}
@@ -2100,35 +2099,6 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
	__brelse(bh);
}

/*
 * Called from jbd2_journal_try_to_free_buffers().
 *
 * Called under jh->b_state_lock
 */
static void
__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
{
	struct journal_head *jh;

	jh = bh2jh(bh);

	if (buffer_locked(bh) || buffer_dirty(bh))
		goto out;

	if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
		goto out;

	spin_lock(&journal->j_list_lock);
	if (jh->b_cp_transaction != NULL) {
		/* written-back checkpointed metadata buffer */
		JBUFFER_TRACE(jh, "remove from checkpoint list");
		__jbd2_journal_remove_checkpoint(jh);
	}
	spin_unlock(&journal->j_list_lock);
out:
	return;
}

/**
 * jbd2_journal_try_to_free_buffers() - try to free page buffers.
 * @journal: journal for operation
@@ -2186,7 +2156,13 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio)
			continue;

		spin_lock(&jh->b_state_lock);
		__journal_try_to_free_buffer(journal, bh);
		if (!jh->b_transaction && !jh->b_next_transaction) {
			spin_lock(&journal->j_list_lock);
			/* Remove written-back checkpointed metadata buffer */
			if (jh->b_cp_transaction != NULL)
				jbd2_journal_try_remove_checkpoint(jh);
			spin_unlock(&journal->j_list_lock);
		}
		spin_unlock(&jh->b_state_lock);
		jbd2_journal_put_journal_head(jh);
		if (buffer_jbd(bh))
Loading