Commit 1135ca9c authored by Dan Magenheimer's avatar Dan Magenheimer Committed by Greg Kroah-Hartman
Browse files

staging: ramster: cluster/messaging foundation



Copy cluster subdirectory from ocfs2.  These files implement
the basic cluster discovery, mapping, heartbeat / keepalive, and
messaging ("o2net") that ramster requires for internode communication.
Note: there are NO ramster-specific changes yet; this commit
does NOT pass checkpatch since the copied source files do not.

(Why copy?  This particular part of ocfs2 has never been broken out
for non-ocfs2 use before, some (small) changes are required for ramster
to use that code, and ramster is currently incompatible with real
ocfs2 anyway (requires !CONFIG_OCFS2_FS).  Before ramster can be promoted
out of staging, we will need to work with the ocfs2 maintainers to
see if the code interdependencies can be merged, but for now, for
staging, this seemed to be an expedient way to make use of the ocfs2
core cluster code while still incorporating necessary changes for ramster.)

Signed-off-by: default avatarDan Magenheimer <dan.magenheimer@oracle.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 32de21f7
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o

ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \
	quorum.o tcp.o netdebug.o ver.o
+2678 −0

File added.

Preview size limit exceeded, changes collapsed.

+89 −0
Original line number Diff line number Diff line
/* -*- mode: c; c-basic-offset: 8; -*-
 * vim: noexpandtab sw=8 ts=8 sts=0:
 *
 * heartbeat.h
 *
 * Function prototypes
 *
 * Copyright (C) 2004 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 *
 */

#ifndef O2CLUSTER_HEARTBEAT_H
#define O2CLUSTER_HEARTBEAT_H

#include "ocfs2_heartbeat.h"

#define O2HB_REGION_TIMEOUT_MS		2000

#define O2HB_MAX_REGION_NAME_LEN	32

/* number of changes to be seen as live */
#define O2HB_LIVE_THRESHOLD	   2
/* number of equal samples to be seen as dead */
extern unsigned int o2hb_dead_threshold;
#define O2HB_DEFAULT_DEAD_THRESHOLD	   31
/* Otherwise MAX_WRITE_TIMEOUT will be zero... */
#define O2HB_MIN_DEAD_THRESHOLD	  2
#define O2HB_MAX_WRITE_TIMEOUT_MS (O2HB_REGION_TIMEOUT_MS * (o2hb_dead_threshold - 1))

#define O2HB_CB_MAGIC		0x51d1e4ec

/* callback stuff */
enum o2hb_callback_type {
	O2HB_NODE_DOWN_CB = 0,
	O2HB_NODE_UP_CB,
	O2HB_NUM_CB
};

struct o2nm_node;
typedef void (o2hb_cb_func)(struct o2nm_node *, int, void *);

struct o2hb_callback_func {
	u32			hc_magic;
	struct list_head	hc_item;
	o2hb_cb_func		*hc_func;
	void			*hc_data;
	int			hc_priority;
	enum o2hb_callback_type hc_type;
};

struct config_group *o2hb_alloc_hb_set(void);
void o2hb_free_hb_set(struct config_group *group);

void o2hb_setup_callback(struct o2hb_callback_func *hc,
			 enum o2hb_callback_type type,
			 o2hb_cb_func *func,
			 void *data,
			 int priority);
int o2hb_register_callback(const char *region_uuid,
			   struct o2hb_callback_func *hc);
void o2hb_unregister_callback(const char *region_uuid,
			      struct o2hb_callback_func *hc);
void o2hb_fill_node_map(unsigned long *map,
			unsigned bytes);
void o2hb_exit(void);
int o2hb_init(void);
int o2hb_check_node_heartbeating(u8 node_num);
int o2hb_check_node_heartbeating_from_callback(u8 node_num);
int o2hb_check_local_node_heartbeating(void);
void o2hb_stop_all_regions(void);
int o2hb_get_all_regions(char *region_uuids, u8 numregions);
int o2hb_global_heartbeat_active(void);

#endif /* O2CLUSTER_HEARTBEAT_H */
+155 −0
Original line number Diff line number Diff line
/* -*- mode: c; c-basic-offset: 8; -*-
 * vim: noexpandtab sw=8 ts=8 sts=0:
 *
 * Copyright (C) 2004, 2005 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/string.h>
#include <asm/uaccess.h>

#include "masklog.h"

struct mlog_bits mlog_and_bits = MLOG_BITS_RHS(MLOG_INITIAL_AND_MASK);
EXPORT_SYMBOL_GPL(mlog_and_bits);
struct mlog_bits mlog_not_bits = MLOG_BITS_RHS(0);
EXPORT_SYMBOL_GPL(mlog_not_bits);

static ssize_t mlog_mask_show(u64 mask, char *buf)
{
	char *state;

	if (__mlog_test_u64(mask, mlog_and_bits))
		state = "allow";
	else if (__mlog_test_u64(mask, mlog_not_bits))
		state = "deny";
	else
		state = "off";

	return snprintf(buf, PAGE_SIZE, "%s\n", state);
}

static ssize_t mlog_mask_store(u64 mask, const char *buf, size_t count)
{
	if (!strnicmp(buf, "allow", 5)) {
		__mlog_set_u64(mask, mlog_and_bits);
		__mlog_clear_u64(mask, mlog_not_bits);
	} else if (!strnicmp(buf, "deny", 4)) {
		__mlog_set_u64(mask, mlog_not_bits);
		__mlog_clear_u64(mask, mlog_and_bits);
	} else if (!strnicmp(buf, "off", 3)) {
		__mlog_clear_u64(mask, mlog_not_bits);
		__mlog_clear_u64(mask, mlog_and_bits);
	} else
		return -EINVAL;

	return count;
}

struct mlog_attribute {
	struct attribute attr;
	u64 mask;
};

#define to_mlog_attr(_attr) container_of(_attr, struct mlog_attribute, attr)

#define define_mask(_name) {			\
	.attr = {				\
		.name = #_name,			\
		.mode = S_IRUGO | S_IWUSR,	\
	},					\
	.mask = ML_##_name,			\
}

static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
	define_mask(TCP),
	define_mask(MSG),
	define_mask(SOCKET),
	define_mask(HEARTBEAT),
	define_mask(HB_BIO),
	define_mask(DLMFS),
	define_mask(DLM),
	define_mask(DLM_DOMAIN),
	define_mask(DLM_THREAD),
	define_mask(DLM_MASTER),
	define_mask(DLM_RECOVERY),
	define_mask(DLM_GLUE),
	define_mask(VOTE),
	define_mask(CONN),
	define_mask(QUORUM),
	define_mask(BASTS),
	define_mask(CLUSTER),
	define_mask(ERROR),
	define_mask(NOTICE),
	define_mask(KTHREAD),
};

static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };

static ssize_t mlog_show(struct kobject *obj, struct attribute *attr,
			 char *buf)
{
	struct mlog_attribute *mlog_attr = to_mlog_attr(attr);

	return mlog_mask_show(mlog_attr->mask, buf);
}

static ssize_t mlog_store(struct kobject *obj, struct attribute *attr,
			  const char *buf, size_t count)
{
	struct mlog_attribute *mlog_attr = to_mlog_attr(attr);

	return mlog_mask_store(mlog_attr->mask, buf, count);
}

static const struct sysfs_ops mlog_attr_ops = {
	.show  = mlog_show,
	.store = mlog_store,
};

static struct kobj_type mlog_ktype = {
	.default_attrs = mlog_attr_ptrs,
	.sysfs_ops     = &mlog_attr_ops,
};

static struct kset mlog_kset = {
	.kobj   = {.ktype = &mlog_ktype},
};

int mlog_sys_init(struct kset *o2cb_kset)
{
	int i = 0;

	while (mlog_attrs[i].attr.mode) {
		mlog_attr_ptrs[i] = &mlog_attrs[i].attr;
		i++;
	}
	mlog_attr_ptrs[i] = NULL;

	kobject_set_name(&mlog_kset.kobj, "logmask");
	mlog_kset.kobj.kset = o2cb_kset;
	return kset_register(&mlog_kset);
}

void mlog_sys_shutdown(void)
{
	kset_unregister(&mlog_kset);
}
+219 −0
Original line number Diff line number Diff line
/* -*- mode: c; c-basic-offset: 8; -*-
 * vim: noexpandtab sw=8 ts=8 sts=0:
 *
 * Copyright (C) 2005 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

#ifndef O2CLUSTER_MASKLOG_H
#define O2CLUSTER_MASKLOG_H

/*
 * For now this is a trivial wrapper around printk() that gives the critical
 * ability to enable sets of debugging output at run-time.  In the future this
 * will almost certainly be redirected to relayfs so that it can pay a
 * substantially lower heisenberg tax.
 *
 * Callers associate the message with a bitmask and a global bitmask is
 * maintained with help from /proc.  If any of the bits match the message is
 * output.
 *
 * We must have efficient bit tests on i386 and it seems gcc still emits crazy
 * code for the 64bit compare.  It emits very good code for the dual unsigned
 * long tests, though, completely avoiding tests that can never pass if the
 * caller gives a constant bitmask that fills one of the longs with all 0s.  So
 * the desire is to have almost all of the calls decided on by comparing just
 * one of the longs.  This leads to having infrequently given bits that are
 * frequently matched in the high bits.
 *
 * _ERROR and _NOTICE are used for messages that always go to the console and
 * have appropriate KERN_ prefixes.  We wrap these in our function instead of
 * just calling printk() so that this can eventually make its way through
 * relayfs along with the debugging messages.  Everything else gets KERN_DEBUG.
 * The inline tests and macro dance give GCC the opportunity to quite cleverly
 * only emit the appropriage printk() when the caller passes in a constant
 * mask, as is almost always the case.
 *
 * All this bitmask nonsense is managed from the files under
 * /sys/fs/o2cb/logmask/.  Reading the files gives a straightforward
 * indication of which bits are allowed (allow) or denied (off/deny).
 * 	ENTRY deny
 * 	EXIT deny
 * 	TCP off
 * 	MSG off
 * 	SOCKET off
 * 	ERROR allow
 * 	NOTICE allow
 *
 * Writing changes the state of a given bit and requires a strictly formatted
 * single write() call:
 *
 * 	write(fd, "allow", 5);
 *
 * Echoing allow/deny/off string into the logmask files can flip the bits
 * on or off as expected; here is the bash script for example:
 *
 * log_mask="/sys/fs/o2cb/log_mask"
 * for node in ENTRY EXIT TCP MSG SOCKET ERROR NOTICE; do
 *	echo allow >"$log_mask"/"$node"
 * done
 *
 * The debugfs.ocfs2 tool can also flip the bits with the -l option:
 *
 * debugfs.ocfs2 -l TCP allow
 */

/* for task_struct */
#include <linux/sched.h>

/* bits that are frequently given and infrequently matched in the low word */
/* NOTE: If you add a flag, you need to also update masklog.c! */
#define ML_TCP		0x0000000000000001ULL /* net cluster/tcp.c */
#define ML_MSG		0x0000000000000002ULL /* net network messages */
#define ML_SOCKET	0x0000000000000004ULL /* net socket lifetime */
#define ML_HEARTBEAT	0x0000000000000008ULL /* hb all heartbeat tracking */
#define ML_HB_BIO	0x0000000000000010ULL /* hb io tracing */
#define ML_DLMFS	0x0000000000000020ULL /* dlm user dlmfs */
#define ML_DLM		0x0000000000000040ULL /* dlm general debugging */
#define ML_DLM_DOMAIN	0x0000000000000080ULL /* dlm domain debugging */
#define ML_DLM_THREAD	0x0000000000000100ULL /* dlm domain thread */
#define ML_DLM_MASTER	0x0000000000000200ULL /* dlm master functions */
#define ML_DLM_RECOVERY	0x0000000000000400ULL /* dlm master functions */
#define ML_DLM_GLUE	0x0000000000000800ULL /* ocfs2 dlm glue layer */
#define ML_VOTE		0x0000000000001000ULL /* ocfs2 node messaging  */
#define ML_CONN		0x0000000000002000ULL /* net connection management */
#define ML_QUORUM	0x0000000000004000ULL /* net connection quorum */
#define ML_BASTS	0x0000000000008000ULL /* dlmglue asts and basts */
#define ML_CLUSTER	0x0000000000010000ULL /* cluster stack */

/* bits that are infrequently given and frequently matched in the high word */
#define ML_ERROR	0x1000000000000000ULL /* sent to KERN_ERR */
#define ML_NOTICE	0x2000000000000000ULL /* setn to KERN_NOTICE */
#define ML_KTHREAD	0x4000000000000000ULL /* kernel thread activity */

#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
#ifndef MLOG_MASK_PREFIX
#define MLOG_MASK_PREFIX 0
#endif

/*
 * When logging is disabled, force the bit test to 0 for anything other
 * than errors and notices, allowing gcc to remove the code completely.
 * When enabled, allow all masks.
 */
#if defined(CONFIG_OCFS2_DEBUG_MASKLOG)
#define ML_ALLOWED_BITS ~0
#else
#define ML_ALLOWED_BITS (ML_ERROR|ML_NOTICE)
#endif

#define MLOG_MAX_BITS 64

struct mlog_bits {
	unsigned long words[MLOG_MAX_BITS / BITS_PER_LONG];
};

extern struct mlog_bits mlog_and_bits, mlog_not_bits;

#if BITS_PER_LONG == 32

#define __mlog_test_u64(mask, bits)			\
	( (u32)(mask & 0xffffffff) & bits.words[0] || 	\
	  ((u64)(mask) >> 32) & bits.words[1] )
#define __mlog_set_u64(mask, bits) do {			\
	bits.words[0] |= (u32)(mask & 0xffffffff);	\
       	bits.words[1] |= (u64)(mask) >> 32;		\
} while (0)
#define __mlog_clear_u64(mask, bits) do {		\
	bits.words[0] &= ~((u32)(mask & 0xffffffff));	\
       	bits.words[1] &= ~((u64)(mask) >> 32);		\
} while (0)
#define MLOG_BITS_RHS(mask) {				\
	{						\
		[0] = (u32)(mask & 0xffffffff),		\
		[1] = (u64)(mask) >> 32,		\
	}						\
}

#else /* 32bit long above, 64bit long below */

#define __mlog_test_u64(mask, bits)	((mask) & bits.words[0])
#define __mlog_set_u64(mask, bits) do {		\
	bits.words[0] |= (mask);		\
} while (0)
#define __mlog_clear_u64(mask, bits) do {	\
	bits.words[0] &= ~(mask);		\
} while (0)
#define MLOG_BITS_RHS(mask) { { (mask) } }

#endif

/*
 * smp_processor_id() "helpfully" screams when called outside preemptible
 * regions in current kernels.  sles doesn't have the variants that don't
 * scream.  just do this instead of trying to guess which we're building
 * against.. *sigh*.
 */
#define __mlog_cpu_guess ({		\
	unsigned long _cpu = get_cpu();	\
	put_cpu();			\
	_cpu;				\
})

/* In the following two macros, the whitespace after the ',' just
 * before ##args is intentional. Otherwise, gcc 2.95 will eat the
 * previous token if args expands to nothing.
 */
#define __mlog_printk(level, fmt, args...)				\
	printk(level "(%s,%u,%lu):%s:%d " fmt, current->comm,		\
	       task_pid_nr(current), __mlog_cpu_guess,			\
	       __PRETTY_FUNCTION__, __LINE__ , ##args)

#define mlog(mask, fmt, args...) do {					\
	u64 __m = MLOG_MASK_PREFIX | (mask);				\
	if ((__m & ML_ALLOWED_BITS) &&					\
	    __mlog_test_u64(__m, mlog_and_bits) &&			\
	    !__mlog_test_u64(__m, mlog_not_bits)) {			\
		if (__m & ML_ERROR)					\
			__mlog_printk(KERN_ERR, "ERROR: "fmt , ##args);	\
		else if (__m & ML_NOTICE)				\
			__mlog_printk(KERN_NOTICE, fmt , ##args);	\
		else __mlog_printk(KERN_INFO, fmt , ##args);		\
	}								\
} while (0)

#define mlog_errno(st) do {						\
	int _st = (st);							\
	if (_st != -ERESTARTSYS && _st != -EINTR &&			\
	    _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC)		\
		mlog(ML_ERROR, "status = %lld\n", (long long)_st);	\
} while (0)

#define mlog_bug_on_msg(cond, fmt, args...) do {			\
	if (cond) {							\
		mlog(ML_ERROR, "bug expression: " #cond "\n");		\
		mlog(ML_ERROR, fmt, ##args);				\
		BUG();							\
	}								\
} while (0)

#include <linux/kobject.h>
#include <linux/sysfs.h>
int mlog_sys_init(struct kset *o2cb_subsys);
void mlog_sys_shutdown(void);

#endif /* O2CLUSTER_MASKLOG_H */
Loading