Commit 96a26c59 authored by Baokun Li's avatar Baokun Li
Browse files

fscache: fix op leak due to abort init after parent ready

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IB5UKT



--------------------------------

The following concurrency may cause cachefiles to hang on exit due to
n_ops leaks:

        child       |   close devfd     |         parent
-------------------------------------------------------------------
fscache_object_init
  oob_table = fscache_osm_init_oob
                                        fscache_object_available
                                        fscache_jumpstart_dependents
                                        fscache_enqueue_dependents
fscache_parent_ready
  parent->n_ops++
  parent->n_obj_ops++
                    cachefiles_daemon_release
                     cachefiles_put_unbind_pincount
                      cachefiles_daemon_unbind
                       fscache_withdraw_cache
                        fscache_withdraw_all_objects
                         fscache_raise_event(FSCACHE_OBJECT_EV_KILL)
fscache_object_sm_dispatcher
  // get KILL oob_event_mask
fscache_abort_initialisation
fscache_kill_object
fscache_drop_object
fscache_object_dead
                                        fscache_kill_object
                                          // n_ops != 0 will not drop
                                          transit_to(WAIT_FOR_CLEARANCE)
                                          // No one will ever touch it
                        wait_event(&cache->object_count == 0)
                        // object_count is never 0 resulting in hung

Therefore the n_ops/n_obj_ops of the parent are held after updating the
oob_table to fscache_osm_lookup_oob in fscache_look_up_object(). This
always ensures that fscache_done_parent_op() will be called to release
the corresponding counts. Since we've added the n_children of the parent,
the parent won't be freed when looking up the child object, so moving the
n_ops/n_obj_ops won't have an effect.

Fixes: caaef690 ("FS-Cache: Fix object state machine to have separate work and wait states")
Signed-off-by: default avatarBaokun Li <libaokun1@huawei.com>
parent dbcacfa4
Loading
Loading
Loading
Loading
+6 −8
Original line number Diff line number Diff line
@@ -427,17 +427,9 @@ static const struct fscache_state *fscache_initialise_object(struct fscache_obje
static const struct fscache_state *fscache_parent_ready(struct fscache_object *object,
							int event)
{
	struct fscache_object *parent = object->parent;

	_enter("{OBJ%x},%d", object->debug_id, event);

	ASSERT(parent != NULL);

	spin_lock(&parent->lock);
	parent->n_ops++;
	parent->n_obj_ops++;
	object->lookup_jif = jiffies;
	spin_unlock(&parent->lock);

	_leave("");
	return transit_to(LOOK_UP_OBJECT);
@@ -460,6 +452,12 @@ static const struct fscache_state *fscache_look_up_object(struct fscache_object
	object->oob_table = fscache_osm_lookup_oob;

	ASSERT(parent != NULL);

	spin_lock(&parent->lock);
	parent->n_ops++;
	parent->n_obj_ops++;
	spin_unlock(&parent->lock);

	ASSERTCMP(parent->n_ops, >, 0);
	ASSERTCMP(parent->n_obj_ops, >, 0);