Commit 644f3a29 authored by Kevin Wolf's avatar Kevin Wolf
Browse files

job: Avoid deadlocks in job_completed_txn_abort()



Amongst others, job_finalize_single() calls the .prepare/.commit/.abort
callbacks of the individual job driver. Recently, their use was adapted
for all block jobs so that they involve code calling AIO_WAIT_WHILE()
now. Such code must be called under the AioContext lock for the
respective job, but without holding any other AioContext lock.

Signed-off-by: default avatarKevin Wolf <kwolf@redhat.com>
Reviewed-by: default avatarMax Reitz <mreitz@redhat.com>
parent ecc1a5c7
Loading
Loading
Loading
Loading
+11 −5
Original line number Diff line number Diff line
@@ -718,6 +718,7 @@ static void job_cancel_async(Job *job, bool force)

static void job_completed_txn_abort(Job *job)
{
    AioContext *outer_ctx = job->aio_context;
    AioContext *ctx;
    JobTxn *txn = job->txn;
    Job *other_job;
@@ -731,23 +732,26 @@ static void job_completed_txn_abort(Job *job)
    txn->aborting = true;
    job_txn_ref(txn);

    /* We are the first failed job. Cancel other jobs. */
    QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
        ctx = other_job->aio_context;
        aio_context_acquire(ctx);
    }
    /* We can only hold the single job's AioContext lock while calling
     * job_finalize_single() because the finalization callbacks can involve
     * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */
    aio_context_release(outer_ctx);

    /* Other jobs are effectively cancelled by us, set the status for
     * them; this job, however, may or may not be cancelled, depending
     * on the caller, so leave it. */
    QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
        if (other_job != job) {
            ctx = other_job->aio_context;
            aio_context_acquire(ctx);
            job_cancel_async(other_job, false);
            aio_context_release(ctx);
        }
    }
    while (!QLIST_EMPTY(&txn->jobs)) {
        other_job = QLIST_FIRST(&txn->jobs);
        ctx = other_job->aio_context;
        aio_context_acquire(ctx);
        if (!job_is_completed(other_job)) {
            assert(job_is_cancelled(other_job));
            job_finish_sync(other_job, NULL, NULL);
@@ -756,6 +760,8 @@ static void job_completed_txn_abort(Job *job)
        aio_context_release(ctx);
    }

    aio_context_acquire(outer_ctx);

    job_txn_unref(txn);
}