Commit ba868d9d authored by Alexander Aring's avatar Alexander Aring Committed by David Teigland
Browse files

fs: dlm: reconnect if socket error report occurs



This patch will change the reconnect handling that if an error occurs
if a socket error callback is occurred. This will also handle reconnects
in a non blocking connecting case which is currently missing. If error
ECONNREFUSED is reported we delay the reconnect by one second.

Signed-off-by: default avatarAlexander Aring <aahringo@redhat.com>
Signed-off-by: default avatarDavid Teigland <teigland@redhat.com>
parent 7443bc96
Loading
Loading
Loading
Loading
+39 −21
Original line number Diff line number Diff line
@@ -79,6 +79,8 @@ struct connection {
#define CF_CLOSING 8
#define CF_SHUTDOWN 9
#define CF_CONNECTED 10
#define CF_RECONNECT 11
#define CF_DELAY_CONNECT 12
	struct list_head writequeue;  /* List of outgoing writequeue_entries */
	spinlock_t writequeue_lock;
	void (*connect_action) (struct connection *);	/* What to do to connect */
@@ -87,6 +89,7 @@ struct connection {
#define MAX_CONNECT_RETRIES 3
	struct hlist_node list;
	struct connection *othercon;
	struct connection *sendcon;
	struct work_struct rwork; /* Receive workqueue */
	struct work_struct swork; /* Send workqueue */
	wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
@@ -585,6 +588,22 @@ static void lowcomms_error_report(struct sock *sk)
				   dlm_config.ci_tcp_port, sk->sk_err,
				   sk->sk_err_soft);
	}

	/* below sendcon only handling */
	if (test_bit(CF_IS_OTHERCON, &con->flags))
		con = con->sendcon;

	switch (sk->sk_err) {
	case ECONNREFUSED:
		set_bit(CF_DELAY_CONNECT, &con->flags);
		break;
	default:
		break;
	}

	if (!test_and_set_bit(CF_RECONNECT, &con->flags))
		queue_work(send_workqueue, &con->swork);

out:
	read_unlock_bh(&sk->sk_callback_lock);
	if (orig_report)
@@ -702,6 +721,8 @@ static void close_connection(struct connection *con, bool and_other,
	con->rx_leftover = 0;
	con->retries = 0;
	clear_bit(CF_CONNECTED, &con->flags);
	clear_bit(CF_DELAY_CONNECT, &con->flags);
	clear_bit(CF_RECONNECT, &con->flags);
	mutex_unlock(&con->sock_mutex);
	clear_bit(CF_CLOSING, &con->flags);
}
@@ -840,10 +861,8 @@ static int receive_from_sock(struct connection *con)

out_close:
	mutex_unlock(&con->sock_mutex);
	if (ret != -EAGAIN) {
		/* Reconnect when there is something to send */
		close_connection(con, false, true, false);
	if (ret == 0) {
		close_connection(con, false, true, false);
		log_print("connection %p got EOF from %d",
			  con, con->nodeid);
		/* handling for tcp shutdown */
@@ -852,7 +871,6 @@ static int receive_from_sock(struct connection *con)
		/* signal to breaking receive worker */
		ret = -1;
	}
	}
	return ret;
}

@@ -940,6 +958,7 @@ static int accept_from_sock(struct listen_connection *con)
			lockdep_set_subclass(&othercon->sock_mutex, 1);
			set_bit(CF_IS_OTHERCON, &othercon->flags);
			newcon->othercon = othercon;
			othercon->sendcon = newcon;
		} else {
			/* close other sock con if we have something new */
			close_connection(othercon, false, true, false);
@@ -1504,7 +1523,7 @@ static void send_to_sock(struct connection *con)
				cond_resched();
				goto out;
			} else if (ret < 0)
				goto send_error;
				goto out;
		}

		/* Don't starve people filling buffers */
@@ -1521,14 +1540,6 @@ static void send_to_sock(struct connection *con)
	mutex_unlock(&con->sock_mutex);
	return;

send_error:
	mutex_unlock(&con->sock_mutex);
	close_connection(con, false, false, true);
	/* Requeue the send work. When the work daemon runs again, it will try
	   a new connection, then call this function again. */
	queue_work(send_workqueue, &con->swork);
	return;

out_connect:
	mutex_unlock(&con->sock_mutex);
	queue_work(send_workqueue, &con->swork);
@@ -1605,8 +1616,15 @@ static void process_send_sockets(struct work_struct *work)
	WARN_ON(test_bit(CF_IS_OTHERCON, &con->flags));

	clear_bit(CF_WRITE_PENDING, &con->flags);
	if (con->sock == NULL) /* not mutex protected so check it inside too */

	if (test_and_clear_bit(CF_RECONNECT, &con->flags))
		close_connection(con, false, false, true);

	if (con->sock == NULL) { /* not mutex protected so check it inside too */
		if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
			msleep(1000);
		con->connect_action(con);
	}
	if (!list_empty(&con->writequeue))
		send_to_sock(con);
}