From 50308d813bf26500fed671882469939fd19403a3 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 4 Nov 2010 15:14:11 +0800
Subject: ocfs2: Try to free truncate log when meeting ENOSPC in write.

Recently, one of our colleagues meet with a problem that if we
write/delete a 32mb files repeatly, we will get an ENOSPC in
the end. And the corresponding bug is 1288.
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288

The real problem is that although we have freed the clusters,
they are in truncate log and they will be summed up so that
we can free them once in a whole.

So this patch just try to resolve it. In case we see -ENOSPC
in ocfs2_write_begin_no_lock, we will check whether the truncate
log has enough clusters for our need, if yes, we will try to
flush the truncate log at that point and try again. This method
is inspired by Mark Fasheh <mfasheh@suse.com>. Thanks.

Cc: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c |  3 +++
 fs/ocfs2/aops.c  | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ocfs2/ocfs2.h |  5 +++++
 3 files changed, 66 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 592fae5007d1..8ec418dd9e36 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5858,6 +5858,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 
 	ocfs2_journal_dirty(handle, tl_bh);
 
+	osb->truncated_clusters += num_clusters;
 bail:
 	mlog_exit(status);
 	return status;
@@ -5929,6 +5930,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
 		i--;
 	}
 
+	osb->truncated_clusters = 0;
+
 bail:
 	mlog_exit(status);
 	return status;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1e962cb3b73..d55a10e2f300 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1627,6 +1627,43 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
 	return ret;
 }
 
+/*
+ * Try to flush truncate logs if we can free enough clusters from it.
+ * As for return value, "< 0" means error, "0" no space and "1" means
+ * we have freed enough spaces and let the caller try to allocate again.
+ */
+static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
+					  unsigned int needed)
+{
+	tid_t target;
+	int ret = 0;
+	unsigned int truncated_clusters;
+
+	mutex_lock(&osb->osb_tl_inode->i_mutex);
+	truncated_clusters = osb->truncated_clusters;
+	mutex_unlock(&osb->osb_tl_inode->i_mutex);
+
+	/*
+	 * Check whether we can succeed in allocating if we free
+	 * the truncate log.
+	 */
+	if (truncated_clusters < needed)
+		goto out;
+
+	ret = ocfs2_flush_truncate_log(osb);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
+		jbd2_log_wait_commit(osb->journal->j_journal, target);
+		ret = 1;
+	}
+out:
+	return ret;
+}
+
 int ocfs2_write_begin_nolock(struct file *filp,
 			     struct address_space *mapping,
 			     loff_t pos, unsigned len, unsigned flags,
@@ -1634,7 +1671,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
 			     struct buffer_head *di_bh, struct page *mmap_page)
 {
 	int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
-	unsigned int clusters_to_alloc, extents_to_split;
+	unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
 	struct ocfs2_write_ctxt *wc;
 	struct inode *inode = mapping->host;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -1643,7 +1680,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
 	struct ocfs2_alloc_context *meta_ac = NULL;
 	handle_t *handle;
 	struct ocfs2_extent_tree et;
+	int try_free = 1, ret1;
 
+try_again:
 	ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
 	if (ret) {
 		mlog_errno(ret);
@@ -1678,6 +1717,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
 		mlog_errno(ret);
 		goto out;
 	} else if (ret == 1) {
+		clusters_need = wc->w_clen;
 		ret = ocfs2_refcount_cow(inode, filp, di_bh,
 					 wc->w_cpos, wc->w_clen, UINT_MAX);
 		if (ret) {
@@ -1692,6 +1732,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
 		mlog_errno(ret);
 		goto out;
 	}
+	clusters_need += clusters_to_alloc;
 
 	di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
 
@@ -1814,6 +1855,22 @@ out:
 		ocfs2_free_alloc_context(data_ac);
 	if (meta_ac)
 		ocfs2_free_alloc_context(meta_ac);
+
+	if (ret == -ENOSPC && try_free) {
+		/*
+		 * Try to free some truncate log so that we can have enough
+		 * clusters to allocate.
+		 */
+		try_free = 0;
+
+		ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need);
+		if (ret1 == 1)
+			goto try_again;
+
+		if (ret1 < 0)
+			mlog_errno(ret1);
+	}
+
 	return ret;
 }
 
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 70dd3b1798f1..51cd6898e7f1 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -420,6 +420,11 @@ struct ocfs2_super
 	struct inode			*osb_tl_inode;
 	struct buffer_head		*osb_tl_bh;
 	struct delayed_work		osb_truncate_log_wq;
+	/*
+	 * How many clusters in our truncate log.
+	 * It must be protected by osb_tl_inode->i_mutex.
+	 */
+	unsigned int truncated_clusters;
 
 	struct ocfs2_node_map		osb_recovering_orphan_dirs;
 	unsigned int			*osb_orphan_wipes;
-- 
cgit v1.2.3


From 8e17d16f401f7c60908726e070bfa5cbdf31e2f3 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Fri, 19 Nov 2010 15:06:49 -0800
Subject: ocfs2/dlm: Cleanup mlogs in dlmthread.c, dlmast.c and dlmdomain.c

Add the domain name and the resource name in the mlogs.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmast.c    |  76 ++++++++++++++++++---------
 fs/ocfs2/dlm/dlmdomain.c |   2 -
 fs/ocfs2/dlm/dlmthread.c | 132 ++++++++++++++++++++++++-----------------------
 3 files changed, 120 insertions(+), 90 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index f44999156839..3a3ed4bb794b 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -90,19 +90,29 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
 void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 {
-	mlog_entry_void();
+	struct dlm_lock_resource *res;
 
 	BUG_ON(!dlm);
 	BUG_ON(!lock);
 
+	res = lock->lockres;
+
 	assert_spin_locked(&dlm->ast_lock);
+
 	if (!list_empty(&lock->ast_list)) {
-		mlog(ML_ERROR, "ast list not empty!!  pending=%d, newlevel=%d\n",
+		mlog(ML_ERROR, "%s: res %.*s, lock %u:%llu, "
+		     "AST list not empty, pending %d, newlevel %d\n",
+		     dlm->name, res->lockname.len, res->lockname.name,
+		     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
 		     lock->ast_pending, lock->ml.type);
 		BUG();
 	}
 	if (lock->ast_pending)
-		mlog(0, "lock has an ast getting flushed right now\n");
+		mlog(0, "%s: res %.*s, lock %u:%llu, AST getting flushed\n",
+		     dlm->name, res->lockname.len, res->lockname.name,
+		     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 
 	/* putting lock on list, add a ref */
 	dlm_lock_get(lock);
@@ -110,9 +120,10 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
 	/* check to see if this ast obsoletes the bast */
 	if (dlm_should_cancel_bast(dlm, lock)) {
-		struct dlm_lock_resource *res = lock->lockres;
-		mlog(0, "%s: cancelling bast for %.*s\n",
-		     dlm->name, res->lockname.len, res->lockname.name);
+		mlog(0, "%s: res %.*s, lock %u:%llu, Cancelling BAST\n",
+		     dlm->name, res->lockname.len, res->lockname.name,
+		     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 		lock->bast_pending = 0;
 		list_del_init(&lock->bast_list);
 		lock->ml.highest_blocked = LKM_IVMODE;
@@ -134,8 +145,6 @@ void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
 void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 {
-	mlog_entry_void();
-
 	BUG_ON(!dlm);
 	BUG_ON(!lock);
 
@@ -147,15 +156,21 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
 void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 {
-	mlog_entry_void();
+	struct dlm_lock_resource *res;
 
 	BUG_ON(!dlm);
 	BUG_ON(!lock);
+
 	assert_spin_locked(&dlm->ast_lock);
 
+	res = lock->lockres;
+
 	BUG_ON(!list_empty(&lock->bast_list));
 	if (lock->bast_pending)
-		mlog(0, "lock has a bast getting flushed right now\n");
+		mlog(0, "%s: res %.*s, lock %u:%llu, BAST getting flushed\n",
+		     dlm->name, res->lockname.len, res->lockname.name,
+		     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 
 	/* putting lock on list, add a ref */
 	dlm_lock_get(lock);
@@ -167,8 +182,6 @@ void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
 void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 {
-	mlog_entry_void();
-
 	BUG_ON(!dlm);
 	BUG_ON(!lock);
 
@@ -213,7 +226,10 @@ void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	dlm_astlockfunc_t *fn;
 	struct dlm_lockstatus *lksb;
 
-	mlog_entry_void();
+	mlog(0, "%s: res %.*s, lock %u:%llu, Local AST\n", dlm->name,
+	     res->lockname.len, res->lockname.name,
+	     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+	     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 
 	lksb = lock->lksb;
 	fn = lock->ast;
@@ -231,7 +247,10 @@ int dlm_do_remote_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	struct dlm_lockstatus *lksb;
 	int lksbflags;
 
-	mlog_entry_void();
+	mlog(0, "%s: res %.*s, lock %u:%llu, Remote AST\n", dlm->name,
+	     res->lockname.len, res->lockname.name,
+	     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+	     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)));
 
 	lksb = lock->lksb;
 	BUG_ON(lock->ml.node == dlm->node_num);
@@ -250,9 +269,14 @@ void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 {
 	dlm_bastlockfunc_t *fn = lock->bast;
 
-	mlog_entry_void();
 	BUG_ON(lock->ml.node != dlm->node_num);
 
+	mlog(0, "%s: res %.*s, lock %u:%llu, Local BAST, blocked %d\n",
+	     dlm->name, res->lockname.len, res->lockname.name,
+	     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+	     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+	     blocked_type);
+
 	(*fn)(lock->astdata, blocked_type);
 }
 
@@ -332,7 +356,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
 	/* cannot get a proxy ast message if this node owns it */
 	BUG_ON(res->owner == dlm->node_num);
 
-	mlog(0, "lockres %.*s\n", res->lockname.len, res->lockname.name);
+	mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
+	     res->lockname.name);
 
 	spin_lock(&res->spinlock);
 	if (res->state & DLM_LOCK_RES_RECOVERING) {
@@ -382,8 +407,12 @@ do_ast:
 	if (past->type == DLM_AST) {
 		/* do not alter lock refcount.  switching lists. */
 		list_move_tail(&lock->list, &res->granted);
-		mlog(0, "ast: Adding to granted list... type=%d, "
-		     "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
+		mlog(0, "%s: res %.*s, lock %u:%llu, Granted type %d => %d\n",
+		     dlm->name, res->lockname.len, res->lockname.name,
+		     dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
+		     lock->ml.type, lock->ml.convert_type);
+
 		if (lock->ml.convert_type != LKM_IVMODE) {
 			lock->ml.type = lock->ml.convert_type;
 			lock->ml.convert_type = LKM_IVMODE;
@@ -426,9 +455,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	size_t veclen = 1;
 	int status;
 
-	mlog_entry("res %.*s, to=%u, type=%d, blocked_type=%d\n",
-		   res->lockname.len, res->lockname.name, lock->ml.node,
-		   msg_type, blocked_type);
+	mlog(0, "%s: res %.*s, to %u, type %d, blocked_type %d\n", dlm->name,
+	     res->lockname.len, res->lockname.name, lock->ml.node, msg_type,
+	     blocked_type);
 
 	memset(&past, 0, sizeof(struct dlm_proxy_ast));
 	past.node_idx = dlm->node_num;
@@ -441,7 +470,6 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	vec[0].iov_len = sizeof(struct dlm_proxy_ast);
 	vec[0].iov_base = &past;
 	if (flags & DLM_LKSB_GET_LVB) {
-		mlog(0, "returning requested LVB data\n");
 		be32_add_cpu(&past.flags, LKM_GET_LVB);
 		vec[1].iov_len = DLM_LVB_LEN;
 		vec[1].iov_base = lock->lksb->lvb;
@@ -451,8 +479,8 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
 				     lock->ml.node, &status);
 	if (ret < 0)
-		mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
-		     "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key,
+		mlog(ML_ERROR, "%s: res %.*s, error %d send AST to node %u\n",
+		     dlm->name, res->lockname.len, res->lockname.name, ret,
 		     lock->ml.node);
 	else {
 		if (status == DLM_RECOVERING) {
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index cc2aaa96cfe5..fcc40c33489d 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -460,8 +460,6 @@ redo_bucket:
 		}
 		cond_resched_lock(&dlm->spinlock);
 		num += n;
-		mlog(0, "%s: touched %d lockreses in bucket %d "
-		     "(tot=%d)\n", dlm->name, n, i, num);
 	}
 	spin_unlock(&dlm->spinlock);
 	wake_up(&dlm->dlm_thread_wq);
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 2211acf33d9b..1d6d1d22c471 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -122,15 +122,13 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res)
 void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 			      struct dlm_lock_resource *res)
 {
-	mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
-
 	assert_spin_locked(&dlm->spinlock);
 	assert_spin_locked(&res->spinlock);
 
 	if (__dlm_lockres_unused(res)){
 		if (list_empty(&res->purge)) {
-			mlog(0, "putting lockres %.*s:%p onto purge list\n",
-			     res->lockname.len, res->lockname.name, res);
+			mlog(0, "%s: Adding res %.*s to purge list\n",
+			     dlm->name, res->lockname.len, res->lockname.name);
 
 			res->last_used = jiffies;
 			dlm_lockres_get(res);
@@ -138,8 +136,8 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 			dlm->purge_count++;
 		}
 	} else if (!list_empty(&res->purge)) {
-		mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n",
-		     res->lockname.len, res->lockname.name, res, res->owner);
+		mlog(0, "%s: Removing res %.*s from purge list\n",
+		     dlm->name, res->lockname.len, res->lockname.name);
 
 		list_del_init(&res->purge);
 		dlm_lockres_put(res);
@@ -150,7 +148,6 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
 			    struct dlm_lock_resource *res)
 {
-	mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
 	spin_lock(&dlm->spinlock);
 	spin_lock(&res->spinlock);
 
@@ -171,9 +168,8 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
 
 	master = (res->owner == dlm->node_num);
 
-
-	mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
-	     res->lockname.name, master);
+	mlog(0, "%s: Purging res %.*s, master %d\n", dlm->name,
+	     res->lockname.len, res->lockname.name, master);
 
 	if (!master) {
 		res->state |= DLM_LOCK_RES_DROPPING_REF;
@@ -189,27 +185,25 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
 		/* clear our bit from the master's refmap, ignore errors */
 		ret = dlm_drop_lockres_ref(dlm, res);
 		if (ret < 0) {
-			mlog_errno(ret);
+			mlog(ML_ERROR, "%s: deref %.*s failed %d\n", dlm->name,
+			     res->lockname.len, res->lockname.name, ret);
 			if (!dlm_is_host_down(ret))
 				BUG();
 		}
-		mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
-		     dlm->name, res->lockname.len, res->lockname.name, ret);
 		spin_lock(&dlm->spinlock);
 		spin_lock(&res->spinlock);
 	}
 
 	if (!list_empty(&res->purge)) {
-		mlog(0, "removing lockres %.*s:%p from purgelist, "
-		     "master = %d\n", res->lockname.len, res->lockname.name,
-		     res, master);
+		mlog(0, "%s: Removing res %.*s from purgelist, master %d\n",
+		     dlm->name, res->lockname.len, res->lockname.name, master);
 		list_del_init(&res->purge);
 		dlm_lockres_put(res);
 		dlm->purge_count--;
 	}
 
 	if (!__dlm_lockres_unused(res)) {
-		mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
+		mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
 		     dlm->name, res->lockname.len, res->lockname.name);
 		__dlm_print_one_lock_resource(res);
 		BUG();
@@ -266,10 +260,10 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 		unused = __dlm_lockres_unused(lockres);
 		if (!unused ||
 		    (lockres->state & DLM_LOCK_RES_MIGRATING)) {
-			mlog(0, "lockres %s:%.*s: is in use or "
-			     "being remastered, used %d, state %d\n",
-			     dlm->name, lockres->lockname.len,
-			     lockres->lockname.name, !unused, lockres->state);
+			mlog(0, "%s: res %.*s is in use or being remastered, "
+			     "used %d, state %d\n", dlm->name,
+			     lockres->lockname.len, lockres->lockname.name,
+			     !unused, lockres->state);
 			list_move_tail(&dlm->purge_list, &lockres->purge);
 			spin_unlock(&lockres->spinlock);
 			continue;
@@ -296,15 +290,12 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
 	struct list_head *head;
 	int can_grant = 1;
 
-	//mlog(0, "res->lockname.len=%d\n", res->lockname.len);
-	//mlog(0, "res->lockname.name=%p\n", res->lockname.name);
-	//mlog(0, "shuffle res %.*s\n", res->lockname.len,
-	//	  res->lockname.name);
-
-	/* because this function is called with the lockres
+	/*
+	 * Because this function is called with the lockres
 	 * spinlock, and because we know that it is not migrating/
 	 * recovering/in-progress, it is fine to reserve asts and
-	 * basts right before queueing them all throughout */
+	 * basts right before queueing them all throughout
+	 */
 	assert_spin_locked(&dlm->ast_lock);
 	assert_spin_locked(&res->spinlock);
 	BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
@@ -314,13 +305,13 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
 converting:
 	if (list_empty(&res->converting))
 		goto blocked;
-	mlog(0, "res %.*s has locks on a convert queue\n", res->lockname.len,
-	     res->lockname.name);
+	mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm->name,
+	     res->lockname.len, res->lockname.name);
 
 	target = list_entry(res->converting.next, struct dlm_lock, list);
 	if (target->ml.convert_type == LKM_IVMODE) {
-		mlog(ML_ERROR, "%.*s: converting a lock with no "
-		     "convert_type!\n", res->lockname.len, res->lockname.name);
+		mlog(ML_ERROR, "%s: res %.*s converting lock to invalid mode\n",
+		     dlm->name, res->lockname.len, res->lockname.name);
 		BUG();
 	}
 	head = &res->granted;
@@ -365,9 +356,12 @@ converting:
 		spin_lock(&target->spinlock);
 		BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
 
-		mlog(0, "calling ast for converting lock: %.*s, have: %d, "
-		     "granting: %d, node: %u\n", res->lockname.len,
-		     res->lockname.name, target->ml.type,
+		mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type "
+		     "%d => %d, node %u\n", dlm->name, res->lockname.len,
+		     res->lockname.name,
+		     dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
+		     target->ml.type,
 		     target->ml.convert_type, target->ml.node);
 
 		target->ml.type = target->ml.convert_type;
@@ -428,11 +422,14 @@ blocked:
 		spin_lock(&target->spinlock);
 		BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
 
-		mlog(0, "calling ast for blocked lock: %.*s, granting: %d, "
-		     "node: %u\n", res->lockname.len, res->lockname.name,
+		mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, "
+		     "node %u\n", dlm->name, res->lockname.len,
+		     res->lockname.name,
+		     dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)),
 		     target->ml.type, target->ml.node);
 
-		// target->ml.type is already correct
+		/* target->ml.type is already correct */
 		list_move_tail(&target->list, &res->granted);
 
 		BUG_ON(!target->lksb);
@@ -453,7 +450,6 @@ leave:
 /* must have NO locks when calling this with res !=NULL * */
 void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 {
-	mlog_entry("dlm=%p, res=%p\n", dlm, res);
 	if (res) {
 		spin_lock(&dlm->spinlock);
 		spin_lock(&res->spinlock);
@@ -466,8 +462,6 @@ void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 
 void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 {
-	mlog_entry("dlm=%p, res=%p\n", dlm, res);
-
 	assert_spin_locked(&dlm->spinlock);
 	assert_spin_locked(&res->spinlock);
 
@@ -484,13 +478,16 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 			res->state |= DLM_LOCK_RES_DIRTY;
 		}
 	}
+
+	mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len,
+	     res->lockname.name);
 }
 
 
 /* Launch the NM thread for the mounted volume */
 int dlm_launch_thread(struct dlm_ctxt *dlm)
 {
-	mlog(0, "starting dlm thread...\n");
+	mlog(0, "Starting dlm_thread...\n");
 
 	dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread");
 	if (IS_ERR(dlm->dlm_thread_task)) {
@@ -505,7 +502,7 @@ int dlm_launch_thread(struct dlm_ctxt *dlm)
 void dlm_complete_thread(struct dlm_ctxt *dlm)
 {
 	if (dlm->dlm_thread_task) {
-		mlog(ML_KTHREAD, "waiting for dlm thread to exit\n");
+		mlog(ML_KTHREAD, "Waiting for dlm thread to exit\n");
 		kthread_stop(dlm->dlm_thread_task);
 		dlm->dlm_thread_task = NULL;
 	}
@@ -536,7 +533,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
 		/* get an extra ref on lock */
 		dlm_lock_get(lock);
 		res = lock->lockres;
-		mlog(0, "delivering an ast for this lockres\n");
+		mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, "
+		     "node %u\n", dlm->name, res->lockname.len,
+		     res->lockname.name,
+		     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+		     lock->ml.type, lock->ml.node);
 
 		BUG_ON(!lock->ast_pending);
 
@@ -557,9 +559,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
 		/* possible that another ast was queued while
 		 * we were delivering the last one */
 		if (!list_empty(&lock->ast_list)) {
-			mlog(0, "aha another ast got queued while "
-			     "we were finishing the last one.  will "
-			     "keep the ast_pending flag set.\n");
+			mlog(0, "%s: res %.*s, AST queued while flushing last "
+			     "one\n", dlm->name, res->lockname.len,
+			     res->lockname.name);
 		} else
 			lock->ast_pending = 0;
 
@@ -590,8 +592,12 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
 		dlm_lock_put(lock);
 		spin_unlock(&dlm->ast_lock);
 
-		mlog(0, "delivering a bast for this lockres "
-		     "(blocked = %d\n", hi);
+		mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, "
+		     "blocked %d, node %u\n",
+		     dlm->name, res->lockname.len, res->lockname.name,
+		     dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+		     dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+		     hi, lock->ml.node);
 
 		if (lock->ml.node != dlm->node_num) {
 			ret = dlm_send_proxy_bast(dlm, res, lock, hi);
@@ -605,9 +611,9 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
 		/* possible that another bast was queued while
 		 * we were delivering the last one */
 		if (!list_empty(&lock->bast_list)) {
-			mlog(0, "aha another bast got queued while "
-			     "we were finishing the last one.  will "
-			     "keep the bast_pending flag set.\n");
+			mlog(0, "%s: res %.*s, BAST queued while flushing last "
+			     "one\n", dlm->name, res->lockname.len,
+			     res->lockname.name);
 		} else
 			lock->bast_pending = 0;
 
@@ -675,11 +681,12 @@ static int dlm_thread(void *data)
 			spin_lock(&res->spinlock);
 			if (res->owner != dlm->node_num) {
 				__dlm_print_one_lock_resource(res);
-				mlog(ML_ERROR, "inprog:%s, mig:%s, reco:%s, dirty:%s\n",
-				     res->state & DLM_LOCK_RES_IN_PROGRESS ? "yes" : "no",
-				     res->state & DLM_LOCK_RES_MIGRATING ? "yes" : "no",
-				     res->state & DLM_LOCK_RES_RECOVERING ? "yes" : "no",
-				     res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no");
+				mlog(ML_ERROR, "%s: inprog %d, mig %d, reco %d,"
+				     " dirty %d\n", dlm->name,
+				     !!(res->state & DLM_LOCK_RES_IN_PROGRESS),
+				     !!(res->state & DLM_LOCK_RES_MIGRATING),
+				     !!(res->state & DLM_LOCK_RES_RECOVERING),
+				     !!(res->state & DLM_LOCK_RES_DIRTY));
 			}
 			BUG_ON(res->owner != dlm->node_num);
 
@@ -693,8 +700,8 @@ static int dlm_thread(void *data)
 				res->state &= ~DLM_LOCK_RES_DIRTY;
 				spin_unlock(&res->spinlock);
 				spin_unlock(&dlm->ast_lock);
-				mlog(0, "delaying list shuffling for in-"
-				     "progress lockres %.*s, state=%d\n",
+				mlog(0, "%s: res %.*s, inprogress, delay list "
+				     "shuffle, state %d\n", dlm->name,
 				     res->lockname.len, res->lockname.name,
 				     res->state);
 				delay = 1;
@@ -706,10 +713,6 @@ static int dlm_thread(void *data)
 			 * spinlock and do NOT have the dlm lock.
 			 * safe to reserve/queue asts and run the lists. */
 
-			mlog(0, "calling dlm_shuffle_lists with dlm=%s, "
-			     "res=%.*s\n", dlm->name,
-			     res->lockname.len, res->lockname.name);
-
 			/* called while holding lockres lock */
 			dlm_shuffle_lists(dlm, res);
 			res->state &= ~DLM_LOCK_RES_DIRTY;
@@ -733,7 +736,8 @@ in_progress:
 			/* unlikely, but we may need to give time to
 			 * other tasks */
 			if (!--n) {
-				mlog(0, "throttling dlm_thread\n");
+				mlog(0, "%s: Throttling dlm thread\n",
+				     dlm->name);
 				break;
 			}
 		}
-- 
cgit v1.2.3


From 66f4500573fe5a1b455e5f7b30068a623a94117f Mon Sep 17 00:00:00 2001
From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Wed, 8 Dec 2010 20:34:39 +0800
Subject: ocfs2/dlm: make existing convertion precedent over new lock

Make existing convertion precedent over new lock. It makes o2dlm locking more
like fair locking.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmlock.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 69cf369961c4..7009292aac5a 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -106,6 +106,9 @@ static int dlm_can_grant_new_lock(struct dlm_lock_resource *res,
 
 		if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
 			return 0;
+		if (!dlm_lock_compatible(tmplock->ml.convert_type,
+					 lock->ml.type))
+			return 0;
 	}
 
 	return 1;
-- 
cgit v1.2.3


From 2b190ce9bf923fb1fd8ccff18e9bab72da40da06 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Tue, 14 Dec 2010 14:14:27 -0800
Subject: ocfs2/cluster: Pin the remote node item in configfs

o2net pins the node item of the remote node in configfs before initiating
the connection. It is unpinned on disconnect. This is to prevent the node
item from being unlinked while it is still in use.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/tcp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9aa426e42123..92de96cd247d 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -355,6 +355,7 @@ static void sc_kref_release(struct kref *kref)
 		sc->sc_sock = NULL;
 	}
 
+	o2nm_undepend_item(&sc->sc_node->nd_item);
 	o2nm_node_put(sc->sc_node);
 	sc->sc_node = NULL;
 
@@ -376,6 +377,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
 {
 	struct o2net_sock_container *sc, *ret = NULL;
 	struct page *page = NULL;
+	int status = 0;
 
 	page = alloc_page(GFP_NOFS);
 	sc = kzalloc(sizeof(*sc), GFP_NOFS);
@@ -386,6 +388,13 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
 	o2nm_node_get(node);
 	sc->sc_node = node;
 
+	/* pin the node item of the remote node */
+	status = o2nm_depend_item(&node->nd_item);
+	if (status) {
+		mlog_errno(status);
+		o2nm_node_put(node);
+		goto out;
+	}
 	INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed);
 	INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty);
 	INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc);
-- 
cgit v1.2.3


From ffee223a9af4c5124beb56fa5c84132949923d23 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Tue, 14 Dec 2010 14:14:28 -0800
Subject: ocfs2/cluster: Remove dropped region from o2hb quorum region bitmap

Patch removes a dropped region from the quorum region bitmap maintained by o2hb.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9f26ac9be2a4..9f21dd785364 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -2009,6 +2009,7 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
 	if (o2hb_global_heartbeat_active()) {
 		clear_bit(reg->hr_region_num, o2hb_region_bitmap);
 		clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
+		clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
 	}
 	hb_task = reg->hr_task;
 	reg->hr_task = NULL;
-- 
cgit v1.2.3


From 58a3158a5d17ddf4894db9e8ccaf92093ff8e42e Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Tue, 14 Dec 2010 14:14:29 -0800
Subject: ocfs2/cluster: Pin/unpin o2hb regions

This patch adds support for pinning o2hb regions in configfs. Pinning disallows
a region to be cleanly stopped as long as it has an active dependent user
(read o2dlm).

In local heartbeat mode, the region uuid matching the domain name is pinned as
long as the o2dlm domain is active.

In global heartbeat mode, all regions are pinned as long as there is atleast
one dependent user and the region count is 3 or less. All regions are unpinned
if the number of dependent users is zero or region count is greater than 3.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c | 216 +++++++++++++++++++++++++++++++++++--------
 fs/ocfs2/dlm/dlmdomain.c     |   8 +-
 2 files changed, 182 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 9f21dd785364..ad2e41d6879d 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -132,6 +132,33 @@ char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = {
 unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
 unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
 
+/*
+ * o2hb_dependent_users tracks the number of registered callbacks that depend
+ * on heartbeat. o2net and o2dlm are two entities that register this callback.
+ * However only o2dlm depends on the heartbeat. It does not want the heartbeat
+ * to stop while a dlm domain is still active.
+ */
+unsigned int o2hb_dependent_users;
+
+/*
+ * In global heartbeat mode, all regions are pinned if there are one or more
+ * dependent users and the quorum region count is <= O2HB_PIN_CUT_OFF. All
+ * regions are unpinned if the region count exceeds the cut off or the number
+ * of dependent users falls to zero.
+ */
+#define O2HB_PIN_CUT_OFF		3
+
+/*
+ * In local heartbeat mode, we assume the dlm domain name to be the same as
+ * region uuid. This is true for domains created for the file system but not
+ * necessarily true for userdlm domains. This is a known limitation.
+ *
+ * In global heartbeat mode, we pin/unpin all o2hb regions. This solution
+ * works for both file system and userdlm domains.
+ */
+static int o2hb_region_pin(const char *region_uuid);
+static void o2hb_region_unpin(const char *region_uuid);
+
 /* Only sets a new threshold if there are no active regions.
  *
  * No locking or otherwise interesting code is required for reading
@@ -186,7 +213,9 @@ struct o2hb_region {
 	struct config_item	hr_item;
 
 	struct list_head	hr_all_item;
-	unsigned		hr_unclean_stop:1;
+	unsigned		hr_unclean_stop:1,
+				hr_item_pinned:1,
+				hr_item_dropped:1;
 
 	/* protected by the hr_callback_sem */
 	struct task_struct 	*hr_task;
@@ -702,6 +731,14 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
 	       config_item_name(&reg->hr_item));
 
 	set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
+
+	/*
+	 * If global heartbeat active, unpin all regions if the
+	 * region count > CUT_OFF
+	 */
+	if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+			   O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
+		o2hb_region_unpin(NULL);
 }
 
 static int o2hb_check_slot(struct o2hb_region *reg,
@@ -1316,6 +1353,8 @@ int o2hb_init(void)
 	memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
 	memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
 
+	o2hb_dependent_users = 0;
+
 	return o2hb_debug_init();
 }
 
@@ -2003,16 +2042,20 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
 {
 	struct task_struct *hb_task;
 	struct o2hb_region *reg = to_o2hb_region(item);
+	int quorum_region = 0;
 
 	/* stop the thread when the user removes the region dir */
 	spin_lock(&o2hb_live_lock);
 	if (o2hb_global_heartbeat_active()) {
 		clear_bit(reg->hr_region_num, o2hb_region_bitmap);
 		clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
+		if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
+			quorum_region = 1;
 		clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
 	}
 	hb_task = reg->hr_task;
 	reg->hr_task = NULL;
+	reg->hr_item_dropped = 1;
 	spin_unlock(&o2hb_live_lock);
 
 	if (hb_task)
@@ -2030,7 +2073,27 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
 	if (o2hb_global_heartbeat_active())
 		printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
 		       config_item_name(&reg->hr_item));
+
 	config_item_put(item);
+
+	if (!o2hb_global_heartbeat_active() || !quorum_region)
+		return;
+
+	/*
+	 * If global heartbeat active and there are dependent users,
+	 * pin all regions if quorum region count <= CUT_OFF
+	 */
+	spin_lock(&o2hb_live_lock);
+
+	if (!o2hb_dependent_users)
+		goto unlock;
+
+	if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+			   O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+		o2hb_region_pin(NULL);
+
+unlock:
+	spin_unlock(&o2hb_live_lock);
 }
 
 struct o2hb_heartbeat_group_attribute {
@@ -2216,63 +2279,138 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
 }
 EXPORT_SYMBOL_GPL(o2hb_setup_callback);
 
-static struct o2hb_region *o2hb_find_region(const char *region_uuid)
+/*
+ * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ * In global heartbeat mode, region_uuid passed is NULL.
+ *
+ * In local, we only pin the matching region. In global we pin all the active
+ * regions.
+ */
+static int o2hb_region_pin(const char *region_uuid)
 {
-	struct o2hb_region *p, *reg = NULL;
+	int ret = 0, found = 0;
+	struct o2hb_region *reg;
+	char *uuid;
 
 	assert_spin_locked(&o2hb_live_lock);
 
-	list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
-		if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
-			reg = p;
-			break;
+	list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+		uuid = config_item_name(&reg->hr_item);
+
+		/* local heartbeat */
+		if (region_uuid) {
+			if (strcmp(region_uuid, uuid))
+				continue;
+			found = 1;
 		}
+
+		if (reg->hr_item_pinned || reg->hr_item_dropped)
+			goto skip_pin;
+
+		/* Ignore ENOENT only for local hb (userdlm domain) */
+		ret = o2nm_depend_item(&reg->hr_item);
+		if (!ret) {
+			mlog(ML_CLUSTER, "Pin region %s\n", uuid);
+			reg->hr_item_pinned = 1;
+		} else {
+			if (ret == -ENOENT && found)
+				ret = 0;
+			else {
+				mlog(ML_ERROR, "Pin region %s fails with %d\n",
+				     uuid, ret);
+				break;
+			}
+		}
+skip_pin:
+		if (found)
+			break;
 	}
 
-	return reg;
+	return ret;
 }
 
-static int o2hb_region_get(const char *region_uuid)
+/*
+ * In local heartbeat mode, region_uuid passed matches the dlm domain name.
+ * In global heartbeat mode, region_uuid passed is NULL.
+ *
+ * In local, we only unpin the matching region. In global we unpin all the
+ * active regions.
+ */
+static void o2hb_region_unpin(const char *region_uuid)
 {
-	int ret = 0;
 	struct o2hb_region *reg;
+	char *uuid;
+	int found = 0;
 
-	spin_lock(&o2hb_live_lock);
+	assert_spin_locked(&o2hb_live_lock);
 
-	reg = o2hb_find_region(region_uuid);
-	if (!reg)
-		ret = -ENOENT;
-	spin_unlock(&o2hb_live_lock);
+	list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
+		uuid = config_item_name(&reg->hr_item);
+		if (region_uuid) {
+			if (strcmp(region_uuid, uuid))
+				continue;
+			found = 1;
+		}
 
-	if (ret)
-		goto out;
+		if (reg->hr_item_pinned) {
+			mlog(ML_CLUSTER, "Unpin region %s\n", uuid);
+			o2nm_undepend_item(&reg->hr_item);
+			reg->hr_item_pinned = 0;
+		}
+		if (found)
+			break;
+	}
+}
 
-	ret = o2nm_depend_this_node();
-	if (ret)
-		goto out;
+static int o2hb_region_inc_user(const char *region_uuid)
+{
+	int ret = 0;
 
-	ret = o2nm_depend_item(&reg->hr_item);
-	if (ret)
-		o2nm_undepend_this_node();
+	spin_lock(&o2hb_live_lock);
 
-out:
+	/* local heartbeat */
+	if (!o2hb_global_heartbeat_active()) {
+	    ret = o2hb_region_pin(region_uuid);
+	    goto unlock;
+	}
+
+	/*
+	 * if global heartbeat active and this is the first dependent user,
+	 * pin all regions if quorum region count <= CUT_OFF
+	 */
+	o2hb_dependent_users++;
+	if (o2hb_dependent_users > 1)
+		goto unlock;
+
+	if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
+			   O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF)
+		ret = o2hb_region_pin(NULL);
+
+unlock:
+	spin_unlock(&o2hb_live_lock);
 	return ret;
 }
 
-static void o2hb_region_put(const char *region_uuid)
+void o2hb_region_dec_user(const char *region_uuid)
 {
-	struct o2hb_region *reg;
-
 	spin_lock(&o2hb_live_lock);
 
-	reg = o2hb_find_region(region_uuid);
+	/* local heartbeat */
+	if (!o2hb_global_heartbeat_active()) {
+	    o2hb_region_unpin(region_uuid);
+	    goto unlock;
+	}
 
-	spin_unlock(&o2hb_live_lock);
+	/*
+	 * if global heartbeat active and there are no dependent users,
+	 * unpin all quorum regions
+	 */
+	o2hb_dependent_users--;
+	if (!o2hb_dependent_users)
+		o2hb_region_unpin(NULL);
 
-	if (reg) {
-		o2nm_undepend_item(&reg->hr_item);
-		o2nm_undepend_this_node();
-	}
+unlock:
+	spin_unlock(&o2hb_live_lock);
 }
 
 int o2hb_register_callback(const char *region_uuid,
@@ -2293,9 +2431,11 @@ int o2hb_register_callback(const char *region_uuid,
 	}
 
 	if (region_uuid) {
-		ret = o2hb_region_get(region_uuid);
-		if (ret)
+		ret = o2hb_region_inc_user(region_uuid);
+		if (ret) {
+			mlog_errno(ret);
 			goto out;
+		}
 	}
 
 	down_write(&o2hb_callback_sem);
@@ -2313,7 +2453,7 @@ int o2hb_register_callback(const char *region_uuid,
 	up_write(&o2hb_callback_sem);
 	ret = 0;
 out:
-	mlog(ML_HEARTBEAT, "returning %d on behalf of %p for funcs %p\n",
+	mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
 	     ret, __builtin_return_address(0), hc);
 	return ret;
 }
@@ -2324,7 +2464,7 @@ void o2hb_unregister_callback(const char *region_uuid,
 {
 	BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
 
-	mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
+	mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
 	     __builtin_return_address(0), hc);
 
 	/* XXX Can this happen _with_ a region reference? */
@@ -2332,7 +2472,7 @@ void o2hb_unregister_callback(const char *region_uuid,
 		return;
 
 	if (region_uuid)
-		o2hb_region_put(region_uuid);
+		o2hb_region_dec_user(region_uuid);
 
 	down_write(&o2hb_callback_sem);
 
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index fcc40c33489d..7e38a072d720 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1659,8 +1659,8 @@ bail:
 
 static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
 {
-	o2hb_unregister_callback(NULL, &dlm->dlm_hb_up);
-	o2hb_unregister_callback(NULL, &dlm->dlm_hb_down);
+	o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_up);
+	o2hb_unregister_callback(dlm->name, &dlm->dlm_hb_down);
 	o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
 }
 
@@ -1672,13 +1672,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
 
 	o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
 			    dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
-	status = o2hb_register_callback(NULL, &dlm->dlm_hb_down);
+	status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_down);
 	if (status)
 		goto bail;
 
 	o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
 			    dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
-	status = o2hb_register_callback(NULL, &dlm->dlm_hb_up);
+	status = o2hb_register_callback(dlm->name, &dlm->dlm_hb_up);
 	if (status)
 		goto bail;
 
-- 
cgit v1.2.3


From cb0586bd4c77c531fe0be4ae860ec642450eeda5 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Tue, 14 Dec 2010 14:14:30 -0800
Subject: ocfs2/cluster: Show pin state for each o2hb region

This patch adds a per o2hb region debugfs file that shows whether that region
is pinned or not.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index ad2e41d6879d..3722e3850a1f 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -82,6 +82,7 @@ static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
 #define O2HB_DB_TYPE_REGION_LIVENODES	4
 #define O2HB_DB_TYPE_REGION_NUMBER	5
 #define O2HB_DB_TYPE_REGION_ELAPSED_TIME	6
+#define O2HB_DB_TYPE_REGION_PINNED	7
 struct o2hb_debug_buf {
 	int db_type;
 	int db_size;
@@ -101,6 +102,7 @@ static struct o2hb_debug_buf *o2hb_db_failedregions;
 #define O2HB_DEBUG_FAILEDREGIONS	"failed_regions"
 #define O2HB_DEBUG_REGION_NUMBER	"num"
 #define O2HB_DEBUG_REGION_ELAPSED_TIME	"elapsed_time_in_ms"
+#define O2HB_DEBUG_REGION_PINNED	"pinned"
 
 static struct dentry *o2hb_debug_dir;
 static struct dentry *o2hb_debug_livenodes;
@@ -241,9 +243,11 @@ struct o2hb_region {
 	struct dentry		*hr_debug_livenodes;
 	struct dentry		*hr_debug_regnum;
 	struct dentry		*hr_debug_elapsed_time;
+	struct dentry		*hr_debug_pinned;
 	struct o2hb_debug_buf	*hr_db_livenodes;
 	struct o2hb_debug_buf	*hr_db_regnum;
 	struct o2hb_debug_buf	*hr_db_elapsed_time;
+	struct o2hb_debug_buf	*hr_db_pinned;
 
 	/* let the person setting up hb wait for it to return until it
 	 * has reached a 'steady' state.  This will be fixed when we have
@@ -1180,6 +1184,12 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
 						 reg->hr_last_timeout_start));
 		goto done;
 
+	case O2HB_DB_TYPE_REGION_PINNED:
+		reg = (struct o2hb_region *)db->db_data;
+		out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
+				!!reg->hr_item_pinned);
+		goto done;
+
 	default:
 		goto done;
 	}
@@ -1424,6 +1434,7 @@ static void o2hb_region_release(struct config_item *item)
 	debugfs_remove(reg->hr_debug_livenodes);
 	debugfs_remove(reg->hr_debug_regnum);
 	debugfs_remove(reg->hr_debug_elapsed_time);
+	debugfs_remove(reg->hr_debug_pinned);
 	debugfs_remove(reg->hr_debug_dir);
 
 	spin_lock(&o2hb_live_lock);
@@ -1988,6 +1999,18 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
 		goto bail;
 	}
 
+	reg->hr_debug_pinned =
+			o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
+					  reg->hr_debug_dir,
+					  &(reg->hr_db_pinned),
+					  sizeof(*(reg->hr_db_pinned)),
+					  O2HB_DB_TYPE_REGION_PINNED,
+					  0, 0, reg);
+	if (!reg->hr_debug_pinned) {
+		mlog_errno(ret);
+		goto bail;
+	}
+
 	ret = 0;
 bail:
 	return ret;
-- 
cgit v1.2.3


From cfc069d3fa24d7c6357e3b731e97f94db495bf0c Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Tue, 14 Dec 2010 14:14:31 -0800
Subject: ocfs2/cluster: Pin the local node when o2hb thread starts

The patch pins the node item of the local node when the o2hb thread
starts and unpins on stop.

An earlier patch pinned the node item of the remote node on o2net
connect and unpinned on disconnect.

Signed-off-by Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 3722e3850a1f..8b50c1ad7a69 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1083,6 +1083,9 @@ static int o2hb_thread(void *data)
 
 	set_user_nice(current, -20);
 
+	/* Pin node */
+	o2nm_depend_this_node();
+
 	while (!kthread_should_stop() && !reg->hr_unclean_stop) {
 		/* We track the time spent inside
 		 * o2hb_do_disk_heartbeat so that we avoid more than
@@ -1132,6 +1135,9 @@ static int o2hb_thread(void *data)
 		mlog_errno(ret);
 	}
 
+	/* Unpin node */
+	o2nm_undepend_this_node();
+
 	mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
 
 	return 0;
-- 
cgit v1.2.3


From 1e6d9153df27923649976554d034a69ac7b28f95 Mon Sep 17 00:00:00 2001
From: Tao Ma <boyu.mt@taobao.com>
Date: Mon, 20 Dec 2010 16:21:11 +0800
Subject: ocfs2: Release buffer_head in case of error in ocfs2_double_lock.

In ocfs2_double_lock, when ocfs2_inode_lock for inode1 fails, we
just unlock inode2 and return without releasing buffer we get from
inode_lock(inode2). The good thing is that it is freed by the only
caller ocfs2_rename when it exits.

But I don't think this is a right way for error handling. We should
free the buffer_head we get in ocfs2_double_lock before exit so that
the caller doesn't need to take care of it.

Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/namei.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ff5744e1e36f..ca35f81a13bb 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1017,8 +1017,11 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 		 * An error return must mean that no cluster locks
 		 * were held on function exit.
 		 */
-		if (oi1->ip_blkno != oi2->ip_blkno)
+		if (oi1->ip_blkno != oi2->ip_blkno) {
 			ocfs2_inode_unlock(inode2, 1);
+			brelse(*bh2);
+			*bh2 = NULL;
+		}
 
 		if (status != -ENOENT)
 			mlog_errno(status);
-- 
cgit v1.2.3


From 02bd9c394ef64a16a313eb4d968a94b7000c5d00 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 20 Dec 2010 16:34:59 -0800
Subject: ocfs2/dlm: Cleanup dlmdebug.c

Remove struct debug_buffer in dlmdebug.c/h.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmdebug.c | 178 ++++++++++++++++++------------------------------
 fs/ocfs2/dlm/dlmdebug.h |   5 --
 2 files changed, 66 insertions(+), 117 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 272ec8631a51..77199ca4409a 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -370,92 +370,46 @@ static void dlm_debug_get(struct dlm_debug_ctxt *dc)
 	kref_get(&dc->debug_refcnt);
 }
 
-static struct debug_buffer *debug_buffer_allocate(void)
+static int debug_release(struct inode *inode, struct file *file)
 {
-	struct debug_buffer *db = NULL;
-
-	db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL);
-	if (!db)
-		goto bail;
-
-	db->len = PAGE_SIZE;
-	db->buf = kmalloc(db->len, GFP_KERNEL);
-	if (!db->buf)
-		goto bail;
-
-	return db;
-bail:
-	kfree(db);
-	return NULL;
-}
-
-static ssize_t debug_buffer_read(struct file *file, char __user *buf,
-				 size_t nbytes, loff_t *ppos)
-{
-	struct debug_buffer *db = file->private_data;
-
-	return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len);
-}
-
-static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence)
-{
-	struct debug_buffer *db = file->private_data;
-	loff_t new = -1;
-
-	switch (whence) {
-	case 0:
-		new = off;
-		break;
-	case 1:
-		new = file->f_pos + off;
-		break;
-	}
-
-	if (new < 0 || new > db->len)
-		return -EINVAL;
-
-	return (file->f_pos = new);
+	free_page((unsigned long)file->private_data);
+	return 0;
 }
 
-static int debug_buffer_release(struct inode *inode, struct file *file)
+static ssize_t debug_read(struct file *file, char __user *buf,
+			  size_t nbytes, loff_t *ppos)
 {
-	struct debug_buffer *db = file->private_data;
-
-	if (db)
-		kfree(db->buf);
-	kfree(db);
-
-	return 0;
+	return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
+				       i_size_read(file->f_mapping->host));
 }
 /* end - util funcs */
 
 /* begin - purge list funcs */
-static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len)
 {
 	struct dlm_lock_resource *res;
 	int out = 0;
 	unsigned long total = 0;
 
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"Dumping Purgelist for Domain: %s\n", dlm->name);
 
 	spin_lock(&dlm->spinlock);
 	list_for_each_entry(res, &dlm->purge_list, purge) {
 		++total;
-		if (db->len - out < 100)
+		if (len - out < 100)
 			continue;
 		spin_lock(&res->spinlock);
 		out += stringify_lockname(res->lockname.name,
 					  res->lockname.len,
-					  db->buf + out, db->len - out);
-		out += snprintf(db->buf + out, db->len - out, "\t%ld\n",
+					  buf + out, len - out);
+		out += snprintf(buf + out, len - out, "\t%ld\n",
 				(jiffies - res->last_used)/HZ);
 		spin_unlock(&res->spinlock);
 	}
 	spin_unlock(&dlm->spinlock);
 
-	out += snprintf(db->buf + out, db->len - out,
-			"Total on list: %ld\n", total);
+	out += snprintf(buf + out, len - out, "Total on list: %ld\n", total);
 
 	return out;
 }
@@ -463,15 +417,15 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 static int debug_purgelist_open(struct inode *inode, struct file *file)
 {
 	struct dlm_ctxt *dlm = inode->i_private;
-	struct debug_buffer *db;
+	char *buf = NULL;
 
-	db = debug_buffer_allocate();
-	if (!db)
+	buf = (char *) get_zeroed_page(GFP_NOFS);
+	if (!buf)
 		goto bail;
 
-	db->len = debug_purgelist_print(dlm, db);
+	i_size_write(inode, debug_purgelist_print(dlm, buf, PAGE_SIZE - 1));
 
-	file->private_data = db;
+	file->private_data = buf;
 
 	return 0;
 bail:
@@ -480,14 +434,14 @@ bail:
 
 static const struct file_operations debug_purgelist_fops = {
 	.open =		debug_purgelist_open,
-	.release =	debug_buffer_release,
-	.read =		debug_buffer_read,
-	.llseek =	debug_buffer_llseek,
+	.release =	debug_release,
+	.read =		debug_read,
+	.llseek =	generic_file_llseek,
 };
 /* end - purge list funcs */
 
 /* begin - debug mle funcs */
-static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
 {
 	struct dlm_master_list_entry *mle;
 	struct hlist_head *bucket;
@@ -495,7 +449,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 	int i, out = 0;
 	unsigned long total = 0, longest = 0, bucket_count = 0;
 
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"Dumping MLEs for Domain: %s\n", dlm->name);
 
 	spin_lock(&dlm->master_lock);
@@ -506,16 +460,16 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 					  master_hash_node);
 			++total;
 			++bucket_count;
-			if (db->len - out < 200)
+			if (len - out < 200)
 				continue;
-			out += dump_mle(mle, db->buf + out, db->len - out);
+			out += dump_mle(mle, buf + out, len - out);
 		}
 		longest = max(longest, bucket_count);
 		bucket_count = 0;
 	}
 	spin_unlock(&dlm->master_lock);
 
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"Total: %ld, Longest: %ld\n", total, longest);
 	return out;
 }
@@ -523,15 +477,15 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 static int debug_mle_open(struct inode *inode, struct file *file)
 {
 	struct dlm_ctxt *dlm = inode->i_private;
-	struct debug_buffer *db;
+	char *buf = NULL;
 
-	db = debug_buffer_allocate();
-	if (!db)
+	buf = (char *) get_zeroed_page(GFP_NOFS);
+	if (!buf)
 		goto bail;
 
-	db->len = debug_mle_print(dlm, db);
+	i_size_write(inode, debug_mle_print(dlm, buf, PAGE_SIZE - 1));
 
-	file->private_data = db;
+	file->private_data = buf;
 
 	return 0;
 bail:
@@ -540,9 +494,9 @@ bail:
 
 static const struct file_operations debug_mle_fops = {
 	.open =		debug_mle_open,
-	.release =	debug_buffer_release,
-	.read =		debug_buffer_read,
-	.llseek =	debug_buffer_llseek,
+	.release =	debug_release,
+	.read =		debug_read,
+	.llseek =	generic_file_llseek,
 };
 
 /* end - debug mle funcs */
@@ -757,7 +711,7 @@ static const struct file_operations debug_lockres_fops = {
 /* end - debug lockres funcs */
 
 /* begin - debug state funcs */
-static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
+static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
 {
 	int out = 0;
 	struct dlm_reco_node_data *node;
@@ -781,35 +735,35 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 	}
 
 	/* Domain: xxxxxxxxxx  Key: 0xdfbac769 */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"Domain: %s  Key: 0x%08x  Protocol: %d.%d\n",
 			dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major,
 			dlm->dlm_locking_proto.pv_minor);
 
 	/* Thread Pid: xxx  Node: xxx  State: xxxxx */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"Thread Pid: %d  Node: %d  State: %s\n",
 			dlm->dlm_thread_task->pid, dlm->node_num, state);
 
 	/* Number of Joins: xxx  Joining Node: xxx */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"Number of Joins: %d  Joining Node: %d\n",
 			dlm->num_joins, dlm->joining_node);
 
 	/* Domain Map: xx xx xx */
-	out += snprintf(db->buf + out, db->len - out, "Domain Map: ");
+	out += snprintf(buf + out, len - out, "Domain Map: ");
 	out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES,
-				 db->buf + out, db->len - out);
-	out += snprintf(db->buf + out, db->len - out, "\n");
+				 buf + out, len - out);
+	out += snprintf(buf + out, len - out, "\n");
 
 	/* Live Map: xx xx xx */
-	out += snprintf(db->buf + out, db->len - out, "Live Map: ");
+	out += snprintf(buf + out, len - out, "Live Map: ");
 	out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
-				 db->buf + out, db->len - out);
-	out += snprintf(db->buf + out, db->len - out, "\n");
+				 buf + out, len - out);
+	out += snprintf(buf + out, len - out, "\n");
 
 	/* Lock Resources: xxx (xxx) */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"Lock Resources: %d (%d)\n",
 			atomic_read(&dlm->res_cur_count),
 			atomic_read(&dlm->res_tot_count));
@@ -821,29 +775,29 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 		cur_mles += atomic_read(&dlm->mle_cur_count[i]);
 
 	/* MLEs: xxx (xxx) */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"MLEs: %d (%d)\n", cur_mles, tot_mles);
 
 	/*  Blocking: xxx (xxx) */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"  Blocking: %d (%d)\n",
 			atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]),
 			atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK]));
 
 	/*  Mastery: xxx (xxx) */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"  Mastery: %d (%d)\n",
 			atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]),
 			atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER]));
 
 	/*  Migration: xxx (xxx) */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"  Migration: %d (%d)\n",
 			atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]),
 			atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION]));
 
 	/* Lists: Dirty=Empty  Purge=InUse  PendingASTs=Empty  ... */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"Lists: Dirty=%s  Purge=%s  PendingASTs=%s  "
 			"PendingBASTs=%s\n",
 			(list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
@@ -852,12 +806,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 			(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"));
 
 	/* Purge Count: xxx  Refs: xxx */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"Purge Count: %d  Refs: %d\n", dlm->purge_count,
 			atomic_read(&dlm->dlm_refs.refcount));
 
 	/* Dead Node: xxx */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"Dead Node: %d\n", dlm->reco.dead_node);
 
 	/* What about DLM_RECO_STATE_FINALIZE? */
@@ -867,19 +821,19 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 		state = "INACTIVE";
 
 	/* Recovery Pid: xxxx  Master: xxx  State: xxxx */
-	out += snprintf(db->buf + out, db->len - out,
+	out += snprintf(buf + out, len - out,
 			"Recovery Pid: %d  Master: %d  State: %s\n",
 			dlm->dlm_reco_thread_task->pid,
 			dlm->reco.new_master, state);
 
 	/* Recovery Map: xx xx */
-	out += snprintf(db->buf + out, db->len - out, "Recovery Map: ");
+	out += snprintf(buf + out, len - out, "Recovery Map: ");
 	out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES,
-				 db->buf + out, db->len - out);
-	out += snprintf(db->buf + out, db->len - out, "\n");
+				 buf + out, len - out);
+	out += snprintf(buf + out, len - out, "\n");
 
 	/* Recovery Node State: */
-	out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n");
+	out += snprintf(buf + out, len - out, "Recovery Node State:\n");
 	list_for_each_entry(node, &dlm->reco.node_data, list) {
 		switch (node->state) {
 		case DLM_RECO_NODE_DATA_INIT:
@@ -907,7 +861,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 			state = "BAD";
 			break;
 		}
-		out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n",
+		out += snprintf(buf + out, len - out, "\t%u - %s\n",
 				node->node_num, state);
 	}
 
@@ -919,15 +873,15 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
 static int debug_state_open(struct inode *inode, struct file *file)
 {
 	struct dlm_ctxt *dlm = inode->i_private;
-	struct debug_buffer *db = NULL;
+	char *buf = NULL;
 
-	db = debug_buffer_allocate();
-	if (!db)
+	buf = (char *) get_zeroed_page(GFP_NOFS);
+	if (!buf)
 		goto bail;
 
-	db->len = debug_state_print(dlm, db);
+	i_size_write(inode, debug_state_print(dlm, buf, PAGE_SIZE - 1));
 
-	file->private_data = db;
+	file->private_data = buf;
 
 	return 0;
 bail:
@@ -936,9 +890,9 @@ bail:
 
 static const struct file_operations debug_state_fops = {
 	.open =		debug_state_open,
-	.release =	debug_buffer_release,
-	.read =		debug_buffer_read,
-	.llseek =	debug_buffer_llseek,
+	.release =	debug_release,
+	.read =		debug_read,
+	.llseek =	generic_file_llseek,
 };
 /* end  - debug state funcs */
 
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
index 8c686d22f9c7..1f27c4812d1a 100644
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -37,11 +37,6 @@ struct dlm_debug_ctxt {
 	struct dentry *debug_purgelist_dentry;
 };
 
-struct debug_buffer {
-	int len;
-	char *buf;
-};
-
 struct debug_lockres {
 	int dl_len;
 	char *dl_buf;
-- 
cgit v1.2.3


From 37096a7927decb0b1d3c2514b8adb4583a834112 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 20 Dec 2010 16:35:00 -0800
Subject: ocfs2/dlm: Minor cleanup

Patch makes use of task_pid_nr(). Also removes the null check before calling
debugfs_remove().

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/netdebug.c | 20 +++++++-------------
 fs/ocfs2/dlm/dlmdebug.c     | 22 ++++++++--------------
 2 files changed, 15 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index a3f150e52b02..0edf836b42d5 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -141,7 +141,7 @@ static int nst_seq_show(struct seq_file *seq, void *v)
 			   "  sock acquiry: %lu.%ld\n"
 			   "  send start:   %lu.%ld\n"
 			   "  wait start:   %lu.%ld\n",
-			   nst, (unsigned long)nst->st_task->pid,
+			   nst, (unsigned long)task_pid_nr(nst->st_task),
 			   (unsigned long)nst->st_task->tgid,
 			   nst->st_task->comm, nst->st_node,
 			   nst->st_sc, nst->st_id, nst->st_msg_type,
@@ -421,23 +421,17 @@ int o2net_debugfs_init(void)
 
 	return 0;
 bail:
-	if (sc_dentry)
-		debugfs_remove(sc_dentry);
-	if (nst_dentry)
-		debugfs_remove(nst_dentry);
-	if (o2net_dentry)
-		debugfs_remove(o2net_dentry);
+	debugfs_remove(sc_dentry);
+	debugfs_remove(nst_dentry);
+	debugfs_remove(o2net_dentry);
 	return -ENOMEM;
 }
 
 void o2net_debugfs_exit(void)
 {
-	if (sc_dentry)
-		debugfs_remove(sc_dentry);
-	if (nst_dentry)
-		debugfs_remove(nst_dentry);
-	if (o2net_dentry)
-		debugfs_remove(o2net_dentry);
+	debugfs_remove(sc_dentry);
+	debugfs_remove(nst_dentry);
+	debugfs_remove(o2net_dentry);
 }
 
 #endif	/* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 77199ca4409a..04a32be0aeb9 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -743,7 +743,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
 	/* Thread Pid: xxx  Node: xxx  State: xxxxx */
 	out += snprintf(buf + out, len - out,
 			"Thread Pid: %d  Node: %d  State: %s\n",
-			dlm->dlm_thread_task->pid, dlm->node_num, state);
+			task_pid_nr(dlm->dlm_thread_task), dlm->node_num, state);
 
 	/* Number of Joins: xxx  Joining Node: xxx */
 	out += snprintf(buf + out, len - out,
@@ -823,7 +823,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
 	/* Recovery Pid: xxxx  Master: xxx  State: xxxx */
 	out += snprintf(buf + out, len - out,
 			"Recovery Pid: %d  Master: %d  State: %s\n",
-			dlm->dlm_reco_thread_task->pid,
+			task_pid_nr(dlm->dlm_reco_thread_task),
 			dlm->reco.new_master, state);
 
 	/* Recovery Map: xx xx */
@@ -956,14 +956,10 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm)
 	struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
 
 	if (dc) {
-		if (dc->debug_purgelist_dentry)
-			debugfs_remove(dc->debug_purgelist_dentry);
-		if (dc->debug_mle_dentry)
-			debugfs_remove(dc->debug_mle_dentry);
-		if (dc->debug_lockres_dentry)
-			debugfs_remove(dc->debug_lockres_dentry);
-		if (dc->debug_state_dentry)
-			debugfs_remove(dc->debug_state_dentry);
+		debugfs_remove(dc->debug_purgelist_dentry);
+		debugfs_remove(dc->debug_mle_dentry);
+		debugfs_remove(dc->debug_lockres_dentry);
+		debugfs_remove(dc->debug_state_dentry);
 		dlm_debug_put(dc);
 	}
 }
@@ -994,8 +990,7 @@ bail:
 
 void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
 {
-	if (dlm->dlm_debugfs_subroot)
-		debugfs_remove(dlm->dlm_debugfs_subroot);
+	debugfs_remove(dlm->dlm_debugfs_subroot);
 }
 
 /* debugfs root */
@@ -1011,7 +1006,6 @@ int dlm_create_debugfs_root(void)
 
 void dlm_destroy_debugfs_root(void)
 {
-	if (dlm_debugfs_root)
-		debugfs_remove(dlm_debugfs_root);
+	debugfs_remove(dlm_debugfs_root);
 }
 #endif	/* CONFIG_DEBUG_FS */
-- 
cgit v1.2.3


From 079ffb743c622fe2189b75614427c56e8391498b Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 20 Dec 2010 16:35:01 -0800
Subject: ocfs2/dlm: Hard code the values for enums

In o2dlm, the enumerated message values are part of the protocol.
The patch hard codes each value so as to reduce the chance of an editing
error causing a protocol mismatch.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmcommon.h | 86 ++++++++++++++++++++++++------------------------
 1 file changed, 43 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index b36d0bf77a5a..4bdf7baee344 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -50,10 +50,10 @@
 #define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l)
 
 enum dlm_mle_type {
-	DLM_MLE_BLOCK,
-	DLM_MLE_MASTER,
-	DLM_MLE_MIGRATION,
-	DLM_MLE_NUM_TYPES
+	DLM_MLE_BLOCK = 0,
+	DLM_MLE_MASTER = 1,
+	DLM_MLE_MIGRATION = 2,
+	DLM_MLE_NUM_TYPES = 3,
 };
 
 struct dlm_master_list_entry {
@@ -82,8 +82,8 @@ struct dlm_master_list_entry {
 
 enum dlm_ast_type {
 	DLM_AST = 0,
-	DLM_BAST,
-	DLM_ASTUNLOCK
+	DLM_BAST = 1,
+	DLM_ASTUNLOCK = 2,
 };
 
 
@@ -119,9 +119,9 @@ struct dlm_recovery_ctxt
 
 enum dlm_ctxt_state {
 	DLM_CTXT_NEW = 0,
-	DLM_CTXT_JOINED,
-	DLM_CTXT_IN_SHUTDOWN,
-	DLM_CTXT_LEAVING,
+	DLM_CTXT_JOINED = 1,
+	DLM_CTXT_IN_SHUTDOWN = 2,
+	DLM_CTXT_LEAVING = 3,
 };
 
 struct dlm_ctxt
@@ -388,8 +388,8 @@ struct dlm_lock
 
 enum dlm_lockres_list {
 	DLM_GRANTED_LIST = 0,
-	DLM_CONVERTING_LIST,
-	DLM_BLOCKED_LIST
+	DLM_CONVERTING_LIST = 1,
+	DLM_BLOCKED_LIST = 2,
 };
 
 static inline int dlm_lvb_is_empty(char *lvb)
@@ -427,27 +427,27 @@ struct dlm_node_iter
 
 
 enum {
-	DLM_MASTER_REQUEST_MSG    = 500,
-	DLM_UNUSED_MSG1,         /* 501 */
-	DLM_ASSERT_MASTER_MSG,	 /* 502 */
-	DLM_CREATE_LOCK_MSG,	 /* 503 */
-	DLM_CONVERT_LOCK_MSG,	 /* 504 */
-	DLM_PROXY_AST_MSG,	 /* 505 */
-	DLM_UNLOCK_LOCK_MSG,	 /* 506 */
-	DLM_DEREF_LOCKRES_MSG,	 /* 507 */
-	DLM_MIGRATE_REQUEST_MSG, /* 508 */
-	DLM_MIG_LOCKRES_MSG, 	 /* 509 */
-	DLM_QUERY_JOIN_MSG,	 /* 510 */
-	DLM_ASSERT_JOINED_MSG,	 /* 511 */
-	DLM_CANCEL_JOIN_MSG,	 /* 512 */
-	DLM_EXIT_DOMAIN_MSG,	 /* 513 */
-	DLM_MASTER_REQUERY_MSG,	 /* 514 */
-	DLM_LOCK_REQUEST_MSG,	 /* 515 */
-	DLM_RECO_DATA_DONE_MSG,	 /* 516 */
-	DLM_BEGIN_RECO_MSG,	 /* 517 */
-	DLM_FINALIZE_RECO_MSG,	 /* 518 */
-	DLM_QUERY_REGION,	 /* 519 */
-	DLM_QUERY_NODEINFO,	 /* 520 */
+	DLM_MASTER_REQUEST_MSG		= 500,
+	DLM_UNUSED_MSG1			= 501,
+	DLM_ASSERT_MASTER_MSG		= 502,
+	DLM_CREATE_LOCK_MSG		= 503,
+	DLM_CONVERT_LOCK_MSG		= 504,
+	DLM_PROXY_AST_MSG		= 505,
+	DLM_UNLOCK_LOCK_MSG		= 506,
+	DLM_DEREF_LOCKRES_MSG		= 507,
+	DLM_MIGRATE_REQUEST_MSG		= 508,
+	DLM_MIG_LOCKRES_MSG		= 509,
+	DLM_QUERY_JOIN_MSG		= 510,
+	DLM_ASSERT_JOINED_MSG		= 511,
+	DLM_CANCEL_JOIN_MSG		= 512,
+	DLM_EXIT_DOMAIN_MSG		= 513,
+	DLM_MASTER_REQUERY_MSG		= 514,
+	DLM_LOCK_REQUEST_MSG		= 515,
+	DLM_RECO_DATA_DONE_MSG		= 516,
+	DLM_BEGIN_RECO_MSG		= 517,
+	DLM_FINALIZE_RECO_MSG		= 518,
+	DLM_QUERY_REGION		= 519,
+	DLM_QUERY_NODEINFO		= 520,
 };
 
 struct dlm_reco_node_data
@@ -460,19 +460,19 @@ struct dlm_reco_node_data
 enum {
 	DLM_RECO_NODE_DATA_DEAD = -1,
 	DLM_RECO_NODE_DATA_INIT = 0,
-	DLM_RECO_NODE_DATA_REQUESTING,
-	DLM_RECO_NODE_DATA_REQUESTED,
-	DLM_RECO_NODE_DATA_RECEIVING,
-	DLM_RECO_NODE_DATA_DONE,
-	DLM_RECO_NODE_DATA_FINALIZE_SENT,
+	DLM_RECO_NODE_DATA_REQUESTING = 1,
+	DLM_RECO_NODE_DATA_REQUESTED = 2,
+	DLM_RECO_NODE_DATA_RECEIVING = 3,
+	DLM_RECO_NODE_DATA_DONE = 4,
+	DLM_RECO_NODE_DATA_FINALIZE_SENT = 5,
 };
 
 
 enum {
 	DLM_MASTER_RESP_NO = 0,
-	DLM_MASTER_RESP_YES,
-	DLM_MASTER_RESP_MAYBE,
-	DLM_MASTER_RESP_ERROR
+	DLM_MASTER_RESP_YES = 1,
+	DLM_MASTER_RESP_MAYBE = 2,
+	DLM_MASTER_RESP_ERROR = 3,
 };
 
 
@@ -649,9 +649,9 @@ struct dlm_proxy_ast
 #define DLM_MOD_KEY (0x666c6172)
 enum dlm_query_join_response_code {
 	JOIN_DISALLOW = 0,
-	JOIN_OK,
-	JOIN_OK_NO_MAP,
-	JOIN_PROTOCOL_MISMATCH,
+	JOIN_OK = 1,
+	JOIN_OK_NO_MAP = 2,
+	JOIN_PROTOCOL_MISMATCH = 3,
 };
 
 struct dlm_query_join_packet {
-- 
cgit v1.2.3


From 8757241e32a295a2aa836e8f8b32912204d11fda Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 22 Dec 2010 12:39:37 -0800
Subject: ocfs2: Add DEBUG_FS dependency

Make OCFS2_FS_STATS depend on DEBUG_FS.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 0d840669698e..ab152c00cd3a 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -51,7 +51,7 @@ config OCFS2_FS_USERSPACE_CLUSTER
 
 config OCFS2_FS_STATS
 	bool "OCFS2 statistics"
-	depends on OCFS2_FS
+	depends on OCFS2_FS && DEBUG_FS
 	default y
 	help
 	  This option allows some fs statistics to be captured. Enabling
-- 
cgit v1.2.3


From 3f9c14fab0a2e90af9995f261a123f59e0b41141 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 22 Dec 2010 12:39:38 -0800
Subject: ocfs2/cluster: Replace timeval with ktime in struct
 o2net_send_tracking

Replace time trackers in struct o2net_send_tracking from struct timeval to
union ktime.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/netdebug.c     | 22 +++++++++++++---------
 fs/ocfs2/cluster/tcp.c          |  6 +++---
 fs/ocfs2/cluster/tcp_internal.h |  6 +++---
 3 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 0edf836b42d5..2b986aa82299 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -123,10 +123,17 @@ static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static int nst_seq_show(struct seq_file *seq, void *v)
 {
 	struct o2net_send_tracking *nst, *dummy_nst = seq->private;
+	ktime_t now;
+	s64 sock, send, status;
 
 	spin_lock(&o2net_debug_lock);
 	nst = next_nst(dummy_nst);
 
+	now = ktime_get();
+	sock = ktime_to_us(ktime_sub(now, nst->st_sock_time));
+	send = ktime_to_us(ktime_sub(now, nst->st_send_time));
+	status = ktime_to_us(ktime_sub(now, nst->st_status_time));
+
 	if (nst != NULL) {
 		/* get_task_comm isn't exported.  oh well. */
 		seq_printf(seq, "%p:\n"
@@ -138,20 +145,17 @@ static int nst_seq_show(struct seq_file *seq, void *v)
 			   "  message id:   %d\n"
 			   "  message type: %u\n"
 			   "  message key:  0x%08x\n"
-			   "  sock acquiry: %lu.%ld\n"
-			   "  send start:   %lu.%ld\n"
-			   "  wait start:   %lu.%ld\n",
+			   "  sock acquiry: %lld usecs ago\n"
+			   "  send start:   %lld usecs ago\n"
+			   "  wait start:   %lld usecs ago\n",
 			   nst, (unsigned long)task_pid_nr(nst->st_task),
 			   (unsigned long)nst->st_task->tgid,
 			   nst->st_task->comm, nst->st_node,
 			   nst->st_sc, nst->st_id, nst->st_msg_type,
 			   nst->st_msg_key,
-			   nst->st_sock_time.tv_sec,
-			   (long)nst->st_sock_time.tv_usec,
-			   nst->st_send_time.tv_sec,
-			   (long)nst->st_send_time.tv_usec,
-			   nst->st_status_time.tv_sec,
-			   (long)nst->st_status_time.tv_usec);
+			   (long long)sock,
+			   (long long)send,
+			   (long long)status);
 	}
 
 	spin_unlock(&o2net_debug_lock);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 92de96cd247d..49c1a95e352e 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -155,17 +155,17 @@ static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
 
 static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
 {
-	do_gettimeofday(&nst->st_sock_time);
+	nst->st_sock_time = ktime_get();
 }
 
 static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
 {
-	do_gettimeofday(&nst->st_send_time);
+	nst->st_send_time = ktime_get();
 }
 
 static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
 {
-	do_gettimeofday(&nst->st_status_time);
+	nst->st_status_time = ktime_get();
 }
 
 static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 15fdbdf9eb4b..b613aaaf3e5c 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -220,9 +220,9 @@ struct o2net_send_tracking {
 	u32				st_msg_type;
 	u32				st_msg_key;
 	u8				st_node;
-	struct timeval			st_sock_time;
-	struct timeval			st_send_time;
-	struct timeval			st_status_time;
+	ktime_t				st_sock_time;
+	ktime_t				st_send_time;
+	ktime_t				st_status_time;
 };
 #else
 struct o2net_send_tracking {
-- 
cgit v1.2.3


From ff1becbf85bf4d4d4652915b7ab27db949585f6b Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 22 Dec 2010 12:39:39 -0800
Subject: ocfs2/cluster: Use ktime instead of timeval in struct
 o2net_sock_container

Replace time trackers in struct o2net_sock_container from struct timeval to
union ktime.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/netdebug.c     | 28 ++++++-------
 fs/ocfs2/cluster/tcp.c          | 91 ++++++++++++++++++++++++-----------------
 fs/ocfs2/cluster/tcp_internal.h | 19 +++++----
 3 files changed, 76 insertions(+), 62 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 2b986aa82299..536a93d13a06 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -280,8 +280,6 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	return sc; /* unused, just needs to be null when done */
 }
 
-#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec
-
 static int sc_seq_show(struct seq_file *seq, void *v)
 {
 	struct o2net_sock_container *sc, *dummy_sc = seq->private;
@@ -313,13 +311,13 @@ static int sc_seq_show(struct seq_file *seq, void *v)
 			   "  remote node:     %s\n"
 			   "  page off:        %zu\n"
 			   "  handshake ok:    %u\n"
-			   "  timer:           %lu.%ld\n"
-			   "  data ready:      %lu.%ld\n"
-			   "  advance start:   %lu.%ld\n"
-			   "  advance stop:    %lu.%ld\n"
-			   "  func start:      %lu.%ld\n"
-			   "  func stop:       %lu.%ld\n"
-			   "  func key:        %u\n"
+			   "  timer:           %lld usecs\n"
+			   "  data ready:      %lld usecs\n"
+			   "  advance start:   %lld usecs\n"
+			   "  advance stop:    %lld usecs\n"
+			   "  func start:      %lld usecs\n"
+			   "  func stop:       %lld usecs\n"
+			   "  func key:        0x%08x\n"
 			   "  func type:       %u\n",
 			   sc,
 			   atomic_read(&sc->sc_kref.refcount),
@@ -328,12 +326,12 @@ static int sc_seq_show(struct seq_file *seq, void *v)
 			   sc->sc_node->nd_name,
 			   sc->sc_page_off,
 			   sc->sc_handshake_ok,
-			   TV_SEC_USEC(sc->sc_tv_timer),
-			   TV_SEC_USEC(sc->sc_tv_data_ready),
-			   TV_SEC_USEC(sc->sc_tv_advance_start),
-			   TV_SEC_USEC(sc->sc_tv_advance_stop),
-			   TV_SEC_USEC(sc->sc_tv_func_start),
-			   TV_SEC_USEC(sc->sc_tv_func_stop),
+			   (long long)ktime_to_us(sc->sc_tv_timer),
+			   (long long)ktime_to_us(sc->sc_tv_data_ready),
+			   (long long)ktime_to_us(sc->sc_tv_advance_start),
+			   (long long)ktime_to_us(sc->sc_tv_advance_stop),
+			   (long long)ktime_to_us(sc->sc_tv_func_start),
+			   (long long)ktime_to_us(sc->sc_tv_func_stop),
 			   sc->sc_msg_key,
 			   sc->sc_msg_type);
 	}
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 49c1a95e352e..efd848334052 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -153,61 +153,75 @@ static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
 	nst->st_node = node;
 }
 
-static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
 {
 	nst->st_sock_time = ktime_get();
 }
 
-static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
 {
 	nst->st_send_time = ktime_get();
 }
 
-static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
 {
 	nst->st_status_time = ktime_get();
 }
 
-static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
-					 struct o2net_sock_container *sc)
+static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+						struct o2net_sock_container *sc)
 {
 	nst->st_sc = sc;
 }
 
-static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
+static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
+					u32 msg_id)
 {
 	nst->st_id = msg_id;
 }
 
-#else  /* CONFIG_DEBUG_FS */
-
-static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
-				  u32 msgkey, struct task_struct *task, u8 node)
+static inline void o2net_set_sock_timer(struct o2net_sock_container *sc)
 {
+	sc->sc_tv_timer = ktime_get();
 }
 
-static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_data_ready_time(struct o2net_sock_container *sc)
 {
+	sc->sc_tv_data_ready = ktime_get();
 }
 
-static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_advance_start_time(struct o2net_sock_container *sc)
 {
+	sc->sc_tv_advance_start = ktime_get();
 }
 
-static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+static inline void o2net_set_advance_stop_time(struct o2net_sock_container *sc)
 {
+	sc->sc_tv_advance_stop = ktime_get();
 }
 
-static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
-						struct o2net_sock_container *sc)
+static inline void o2net_set_func_start_time(struct o2net_sock_container *sc)
 {
+	sc->sc_tv_func_start = ktime_get();
 }
 
-static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
-					u32 msg_id)
+static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
 {
+	sc->sc_tv_func_stop = ktime_get();
 }
-
+#else  /* CONFIG_DEBUG_FS */
+# define o2net_init_nst(a, b, c, d, e)
+# define o2net_set_nst_sock_time(a)
+# define o2net_set_nst_send_time(a)
+# define o2net_set_nst_status_time(a)
+# define o2net_set_nst_sock_container(a, b)
+# define o2net_set_nst_msg_id(a, b)
+# define o2net_set_sock_timer(a)
+# define o2net_set_data_ready_time(a)
+# define o2net_set_advance_start_time(a)
+# define o2net_set_advance_stop_time(a)
+# define o2net_set_func_start_time(a)
+# define o2net_set_func_stop_time(a)
 #endif /* CONFIG_DEBUG_FS */
 
 static inline int o2net_reconnect_delay(void)
@@ -555,7 +569,7 @@ static void o2net_data_ready(struct sock *sk, int bytes)
 	if (sk->sk_user_data) {
 		struct o2net_sock_container *sc = sk->sk_user_data;
 		sclog(sc, "data_ready hit\n");
-		do_gettimeofday(&sc->sc_tv_data_ready);
+		o2net_set_data_ready_time(sc);
 		o2net_sc_queue_work(sc, &sc->sc_rx_work);
 		ready = sc->sc_data_ready;
 	} else {
@@ -1192,13 +1206,13 @@ static int o2net_process_message(struct o2net_sock_container *sc,
 	if (syserr != O2NET_ERR_NONE)
 		goto out_respond;
 
-	do_gettimeofday(&sc->sc_tv_func_start);
+	o2net_set_func_start_time(sc);
 	sc->sc_msg_key = be32_to_cpu(hdr->key);
 	sc->sc_msg_type = be16_to_cpu(hdr->msg_type);
 	handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) +
 					     be16_to_cpu(hdr->data_len),
 					nmh->nh_func_data, &ret_data);
-	do_gettimeofday(&sc->sc_tv_func_stop);
+	o2net_set_func_stop_time(sc);
 
 out_respond:
 	/* this destroys the hdr, so don't use it after this */
@@ -1309,7 +1323,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
 	size_t datalen;
 
 	sclog(sc, "receiving\n");
-	do_gettimeofday(&sc->sc_tv_advance_start);
+	o2net_set_advance_start_time(sc);
 
 	if (unlikely(sc->sc_handshake_ok == 0)) {
 		if(sc->sc_page_off < sizeof(struct o2net_handshake)) {
@@ -1384,7 +1398,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc)
 
 out:
 	sclog(sc, "ret = %d\n", ret);
-	do_gettimeofday(&sc->sc_tv_advance_stop);
+	o2net_set_advance_stop_time(sc);
 	return ret;
 }
 
@@ -1484,27 +1498,28 @@ static void o2net_idle_timer(unsigned long data)
 {
 	struct o2net_sock_container *sc = (struct o2net_sock_container *)data;
 	struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
-	struct timeval now;
 
-	do_gettimeofday(&now);
+#ifdef CONFIG_DEBUG_FS
+	ktime_t now = ktime_get();
+#endif
 
 	printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
 	     "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
 		     o2net_idle_timeout() / 1000,
 		     o2net_idle_timeout() % 1000);
-	mlog(ML_NOTICE, "here are some times that might help debug the "
-	     "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
-	     "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
-	     sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
-	     now.tv_sec, (long) now.tv_usec,
-	     sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec,
-	     sc->sc_tv_advance_start.tv_sec,
-	     (long) sc->sc_tv_advance_start.tv_usec,
-	     sc->sc_tv_advance_stop.tv_sec,
-	     (long) sc->sc_tv_advance_stop.tv_usec,
+
+#ifdef CONFIG_DEBUG_FS
+	mlog(ML_NOTICE, "Here are some times that might help debug the "
+	     "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, "
+	     "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n",
+	     (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now),
+	     (long long)ktime_to_us(sc->sc_tv_data_ready),
+	     (long long)ktime_to_us(sc->sc_tv_advance_start),
+	     (long long)ktime_to_us(sc->sc_tv_advance_stop),
 	     sc->sc_msg_key, sc->sc_msg_type,
-	     sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec,
-	     sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec);
+	     (long long)ktime_to_us(sc->sc_tv_func_start),
+	     (long long)ktime_to_us(sc->sc_tv_func_stop));
+#endif
 
 	/*
 	 * Initialize the nn_timeout so that the next connection attempt
@@ -1520,7 +1535,7 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
 	o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
 	o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
 		      msecs_to_jiffies(o2net_keepalive_delay()));
-	do_gettimeofday(&sc->sc_tv_timer);
+	o2net_set_sock_timer(sc);
 	mod_timer(&sc->sc_idle_timeout,
 	       jiffies + msecs_to_jiffies(o2net_idle_timeout()));
 }
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index b613aaaf3e5c..f81576333911 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -166,18 +166,19 @@ struct o2net_sock_container {
 	/* original handlers for the sockets */
 	void			(*sc_state_change)(struct sock *sk);
 	void			(*sc_data_ready)(struct sock *sk, int bytes);
-#ifdef CONFIG_DEBUG_FS
-	struct list_head        sc_net_debug_item;
-#endif
-	struct timeval 		sc_tv_timer;
-	struct timeval 		sc_tv_data_ready;
-	struct timeval 		sc_tv_advance_start;
-	struct timeval 		sc_tv_advance_stop;
-	struct timeval 		sc_tv_func_start;
-	struct timeval 		sc_tv_func_stop;
+
 	u32			sc_msg_key;
 	u16			sc_msg_type;
 
+#ifdef CONFIG_DEBUG_FS
+	struct list_head        sc_net_debug_item;
+	ktime_t			sc_tv_timer;
+	ktime_t			sc_tv_data_ready;
+	ktime_t			sc_tv_advance_start;
+	ktime_t			sc_tv_advance_stop;
+	ktime_t			sc_tv_func_start;
+	ktime_t			sc_tv_func_stop;
+#endif
 	struct mutex		sc_send_lock;
 };
 
-- 
cgit v1.2.3


From 3c193b3807e933cf2a16d55a38debbe549195847 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 22 Dec 2010 12:39:40 -0800
Subject: ocfs2/cluster: Track send message timing stats for each socket

Tracks total send and status times for all messages sent on a socket.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/tcp.c          | 24 ++++++++++++++++++++++++
 fs/ocfs2/cluster/tcp_internal.h |  6 ++++++
 2 files changed, 30 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index efd848334052..4d61e19d6146 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -224,6 +224,28 @@ static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
 # define o2net_set_func_stop_time(a)
 #endif /* CONFIG_DEBUG_FS */
 
+#ifdef CONFIG_OCFS2_FS_STATS
+static void o2net_update_send_stats(struct o2net_send_tracking *nst,
+				    struct o2net_sock_container *sc)
+{
+	sc->sc_tv_status_total = ktime_add(sc->sc_tv_status_total,
+					   ktime_sub(ktime_get(),
+						     nst->st_status_time));
+	sc->sc_tv_send_total = ktime_add(sc->sc_tv_send_total,
+					 ktime_sub(nst->st_status_time,
+						   nst->st_send_time));
+	sc->sc_tv_acquiry_total = ktime_add(sc->sc_tv_acquiry_total,
+					    ktime_sub(nst->st_send_time,
+						      nst->st_sock_time));
+	sc->sc_send_count++;
+}
+
+#else
+
+# define o2net_update_send_stats(a, b)
+
+#endif /* CONFIG_OCFS2_FS_STATS */
+
 static inline int o2net_reconnect_delay(void)
 {
 	return o2nm_single_cluster->cl_reconnect_delay_ms;
@@ -1093,6 +1115,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
 	o2net_set_nst_status_time(&nst);
 	wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw));
 
+	o2net_update_send_stats(&nst, sc);
+
 	/* Note that we avoid overwriting the callers status return
 	 * variable if a system error was reported on the other
 	 * side. Callers beware. */
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index f81576333911..640c6fcef720 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -178,6 +178,12 @@ struct o2net_sock_container {
 	ktime_t			sc_tv_advance_stop;
 	ktime_t			sc_tv_func_start;
 	ktime_t			sc_tv_func_stop;
+#endif
+#ifdef CONFIG_OCFS2_FS_STATS
+	ktime_t			sc_tv_acquiry_total;
+	ktime_t			sc_tv_send_total;
+	ktime_t			sc_tv_status_total;
+	u32			sc_send_count;
 #endif
 	struct mutex		sc_send_lock;
 };
-- 
cgit v1.2.3


From e453039f8bf44abf82f3ecfb34177e0cb04bce12 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 22 Dec 2010 12:39:41 -0800
Subject: ocfs2/cluster: Track process message timing stats for each socket

Tracks total time taken to process messages received on a socket.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/tcp.c          | 15 +++++++++++++++
 fs/ocfs2/cluster/tcp_internal.h |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 4d61e19d6146..bc2309554d0b 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -209,6 +209,11 @@ static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
 {
 	sc->sc_tv_func_stop = ktime_get();
 }
+
+static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc)
+{
+	return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start);
+}
 #else  /* CONFIG_DEBUG_FS */
 # define o2net_init_nst(a, b, c, d, e)
 # define o2net_set_nst_sock_time(a)
@@ -222,6 +227,7 @@ static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc)
 # define o2net_set_advance_stop_time(a)
 # define o2net_set_func_start_time(a)
 # define o2net_set_func_stop_time(a)
+# define o2net_get_func_run_time(a)		(ktime_t)0
 #endif /* CONFIG_DEBUG_FS */
 
 #ifdef CONFIG_OCFS2_FS_STATS
@@ -240,6 +246,13 @@ static void o2net_update_send_stats(struct o2net_send_tracking *nst,
 	sc->sc_send_count++;
 }
 
+static void o2net_update_recv_stats(struct o2net_sock_container *sc)
+{
+	sc->sc_tv_process_total = ktime_add(sc->sc_tv_process_total,
+					    o2net_get_func_run_time(sc));
+	sc->sc_recv_count++;
+}
+
 #else
 
 # define o2net_update_send_stats(a, b)
@@ -1238,6 +1251,8 @@ static int o2net_process_message(struct o2net_sock_container *sc,
 					nmh->nh_func_data, &ret_data);
 	o2net_set_func_stop_time(sc);
 
+	o2net_update_recv_stats(sc);
+
 out_respond:
 	/* this destroys the hdr, so don't use it after this */
 	mutex_lock(&sc->sc_send_lock);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 640c6fcef720..4cbcb65784a3 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -184,6 +184,8 @@ struct o2net_sock_container {
 	ktime_t			sc_tv_send_total;
 	ktime_t			sc_tv_status_total;
 	u32			sc_send_count;
+	u32			sc_recv_count;
+	ktime_t			sc_tv_process_total;
 #endif
 	struct mutex		sc_send_lock;
 };
-- 
cgit v1.2.3


From db02754c8a1205b24beac70562c45ca5d671151f Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Wed, 22 Dec 2010 12:39:42 -0800
Subject: ocfs2/cluster: Show o2net timing statistics

Adds debugfs dentry o2net/stats to show the o2net timing statistics.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/netdebug.c | 211 ++++++++++++++++++++++++++++++++------------
 1 file changed, 157 insertions(+), 54 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 536a93d13a06..61df89cedded 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -46,10 +46,15 @@
 #define O2NET_DEBUG_DIR		"o2net"
 #define SC_DEBUG_NAME		"sock_containers"
 #define NST_DEBUG_NAME		"send_tracking"
+#define STATS_DEBUG_NAME	"stats"
+
+#define SHOW_SOCK_CONTAINERS	0
+#define SHOW_SOCK_STATS		1
 
 static struct dentry *o2net_dentry;
 static struct dentry *sc_dentry;
 static struct dentry *nst_dentry;
+static struct dentry *stats_dentry;
 
 static DEFINE_SPINLOCK(o2net_debug_lock);
 
@@ -232,6 +237,11 @@ void o2net_debug_del_sc(struct o2net_sock_container *sc)
 	spin_unlock(&o2net_debug_lock);
 }
 
+struct o2net_sock_debug {
+	int dbg_ctxt;
+	struct o2net_sock_container *dbg_sock;
+};
+
 static struct o2net_sock_container
 			*next_sc(struct o2net_sock_container *sc_start)
 {
@@ -257,7 +267,8 @@ static struct o2net_sock_container
 
 static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct o2net_sock_container *sc, *dummy_sc = seq->private;
+	struct o2net_sock_debug *sd = seq->private;
+	struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
 
 	spin_lock(&o2net_debug_lock);
 	sc = next_sc(dummy_sc);
@@ -268,7 +279,8 @@ static void *sc_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct o2net_sock_container *sc, *dummy_sc = seq->private;
+	struct o2net_sock_debug *sd = seq->private;
+	struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
 
 	spin_lock(&o2net_debug_lock);
 	sc = next_sc(dummy_sc);
@@ -280,63 +292,107 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	return sc; /* unused, just needs to be null when done */
 }
 
-static int sc_seq_show(struct seq_file *seq, void *v)
+#ifdef CONFIG_OCFS2_FS_STATS
+# define sc_send_count(_s)		((_s)->sc_send_count)
+# define sc_recv_count(_s)		((_s)->sc_recv_count)
+# define sc_tv_acquiry_total_ns(_s)	(ktime_to_ns((_s)->sc_tv_acquiry_total))
+# define sc_tv_send_total_ns(_s)	(ktime_to_ns((_s)->sc_tv_send_total))
+# define sc_tv_status_total_ns(_s)	(ktime_to_ns((_s)->sc_tv_status_total))
+# define sc_tv_process_total_ns(_s)	(ktime_to_ns((_s)->sc_tv_process_total))
+#else
+# define sc_send_count(_s)		(0U)
+# define sc_recv_count(_s)		(0U)
+# define sc_tv_acquiry_total_ns(_s)	(0LL)
+# define sc_tv_send_total_ns(_s)	(0LL)
+# define sc_tv_status_total_ns(_s)	(0LL)
+# define sc_tv_process_total_ns(_s)	(0LL)
+#endif
+
+/* So that debugfs.ocfs2 can determine which format is being used */
+#define O2NET_STATS_STR_VERSION		1
+static void sc_show_sock_stats(struct seq_file *seq,
+			       struct o2net_sock_container *sc)
 {
-	struct o2net_sock_container *sc, *dummy_sc = seq->private;
+	if (!sc)
+		return;
+
+	seq_printf(seq, "%d,%u,%lu,%lld,%lld,%lld,%lu,%lld\n", O2NET_STATS_STR_VERSION,
+		   sc->sc_node->nd_num, (unsigned long)sc_send_count(sc),
+		   (long long)sc_tv_acquiry_total_ns(sc),
+		   (long long)sc_tv_send_total_ns(sc),
+		   (long long)sc_tv_status_total_ns(sc),
+		   (unsigned long)sc_recv_count(sc),
+		   (long long)sc_tv_process_total_ns(sc));
+}
 
-	spin_lock(&o2net_debug_lock);
-	sc = next_sc(dummy_sc);
+static void sc_show_sock_container(struct seq_file *seq,
+				   struct o2net_sock_container *sc)
+{
+	struct inet_sock *inet = NULL;
+	__be32 saddr = 0, daddr = 0;
+	__be16 sport = 0, dport = 0;
+
+	if (!sc)
+		return;
+
+	if (sc->sc_sock) {
+		inet = inet_sk(sc->sc_sock->sk);
+		/* the stack's structs aren't sparse endian clean */
+		saddr = (__force __be32)inet->inet_saddr;
+		daddr = (__force __be32)inet->inet_daddr;
+		sport = (__force __be16)inet->inet_sport;
+		dport = (__force __be16)inet->inet_dport;
+	}
 
-	if (sc != NULL) {
-		struct inet_sock *inet = NULL;
+	/* XXX sigh, inet-> doesn't have sparse annotation so any
+	 * use of it here generates a warning with -Wbitwise */
+	seq_printf(seq, "%p:\n"
+		   "  krefs:           %d\n"
+		   "  sock:            %pI4:%u -> "
+				      "%pI4:%u\n"
+		   "  remote node:     %s\n"
+		   "  page off:        %zu\n"
+		   "  handshake ok:    %u\n"
+		   "  timer:           %lld usecs\n"
+		   "  data ready:      %lld usecs\n"
+		   "  advance start:   %lld usecs\n"
+		   "  advance stop:    %lld usecs\n"
+		   "  func start:      %lld usecs\n"
+		   "  func stop:       %lld usecs\n"
+		   "  func key:        0x%08x\n"
+		   "  func type:       %u\n",
+		   sc,
+		   atomic_read(&sc->sc_kref.refcount),
+		   &saddr, inet ? ntohs(sport) : 0,
+		   &daddr, inet ? ntohs(dport) : 0,
+		   sc->sc_node->nd_name,
+		   sc->sc_page_off,
+		   sc->sc_handshake_ok,
+		   (long long)ktime_to_us(sc->sc_tv_timer),
+		   (long long)ktime_to_us(sc->sc_tv_data_ready),
+		   (long long)ktime_to_us(sc->sc_tv_advance_start),
+		   (long long)ktime_to_us(sc->sc_tv_advance_stop),
+		   (long long)ktime_to_us(sc->sc_tv_func_start),
+		   (long long)ktime_to_us(sc->sc_tv_func_stop),
+		   sc->sc_msg_key,
+		   sc->sc_msg_type);
+}
 
-		__be32 saddr = 0, daddr = 0;
-		__be16 sport = 0, dport = 0;
+static int sc_seq_show(struct seq_file *seq, void *v)
+{
+	struct o2net_sock_debug *sd = seq->private;
+	struct o2net_sock_container *sc, *dummy_sc = sd->dbg_sock;
 
-		if (sc->sc_sock) {
-			inet = inet_sk(sc->sc_sock->sk);
-			/* the stack's structs aren't sparse endian clean */
-			saddr = (__force __be32)inet->inet_saddr;
-			daddr = (__force __be32)inet->inet_daddr;
-			sport = (__force __be16)inet->inet_sport;
-			dport = (__force __be16)inet->inet_dport;
-		}
+	spin_lock(&o2net_debug_lock);
+	sc = next_sc(dummy_sc);
 
-		/* XXX sigh, inet-> doesn't have sparse annotation so any
-		 * use of it here generates a warning with -Wbitwise */
-		seq_printf(seq, "%p:\n"
-			   "  krefs:           %d\n"
-			   "  sock:            %pI4:%u -> "
-					      "%pI4:%u\n"
-			   "  remote node:     %s\n"
-			   "  page off:        %zu\n"
-			   "  handshake ok:    %u\n"
-			   "  timer:           %lld usecs\n"
-			   "  data ready:      %lld usecs\n"
-			   "  advance start:   %lld usecs\n"
-			   "  advance stop:    %lld usecs\n"
-			   "  func start:      %lld usecs\n"
-			   "  func stop:       %lld usecs\n"
-			   "  func key:        0x%08x\n"
-			   "  func type:       %u\n",
-			   sc,
-			   atomic_read(&sc->sc_kref.refcount),
-			   &saddr, inet ? ntohs(sport) : 0,
-			   &daddr, inet ? ntohs(dport) : 0,
-			   sc->sc_node->nd_name,
-			   sc->sc_page_off,
-			   sc->sc_handshake_ok,
-			   (long long)ktime_to_us(sc->sc_tv_timer),
-			   (long long)ktime_to_us(sc->sc_tv_data_ready),
-			   (long long)ktime_to_us(sc->sc_tv_advance_start),
-			   (long long)ktime_to_us(sc->sc_tv_advance_stop),
-			   (long long)ktime_to_us(sc->sc_tv_func_start),
-			   (long long)ktime_to_us(sc->sc_tv_func_stop),
-			   sc->sc_msg_key,
-			   sc->sc_msg_type);
+	if (sc) {
+		if (sd->dbg_ctxt == SHOW_SOCK_CONTAINERS)
+			sc_show_sock_container(seq, sc);
+		else
+			sc_show_sock_stats(seq, sc);
 	}
 
-
 	spin_unlock(&o2net_debug_lock);
 
 	return 0;
@@ -353,7 +409,7 @@ static const struct seq_operations sc_seq_ops = {
 	.show = sc_seq_show,
 };
 
-static int sc_fop_open(struct inode *inode, struct file *file)
+static int sc_common_open(struct file *file, struct o2net_sock_debug *sd)
 {
 	struct o2net_sock_container *dummy_sc;
 	struct seq_file *seq;
@@ -371,7 +427,8 @@ static int sc_fop_open(struct inode *inode, struct file *file)
 		goto out;
 
 	seq = file->private_data;
-	seq->private = dummy_sc;
+	seq->private = sd;
+	sd->dbg_sock = dummy_sc;
 	o2net_debug_add_sc(dummy_sc);
 
 	dummy_sc = NULL;
@@ -384,12 +441,48 @@ out:
 static int sc_fop_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq = file->private_data;
-	struct o2net_sock_container *dummy_sc = seq->private;
+	struct o2net_sock_debug *sd = seq->private;
+	struct o2net_sock_container *dummy_sc = sd->dbg_sock;
 
 	o2net_debug_del_sc(dummy_sc);
 	return seq_release_private(inode, file);
 }
 
+static int stats_fop_open(struct inode *inode, struct file *file)
+{
+	struct o2net_sock_debug *sd;
+
+	sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
+	if (sd == NULL)
+		return -ENOMEM;
+
+	sd->dbg_ctxt = SHOW_SOCK_STATS;
+	sd->dbg_sock = NULL;
+
+	return sc_common_open(file, sd);
+}
+
+static const struct file_operations stats_seq_fops = {
+	.open = stats_fop_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = sc_fop_release,
+};
+
+static int sc_fop_open(struct inode *inode, struct file *file)
+{
+	struct o2net_sock_debug *sd;
+
+	sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL);
+	if (sd == NULL)
+		return -ENOMEM;
+
+	sd->dbg_ctxt = SHOW_SOCK_CONTAINERS;
+	sd->dbg_sock = NULL;
+
+	return sc_common_open(file, sd);
+}
+
 static const struct file_operations sc_seq_fops = {
 	.open = sc_fop_open,
 	.read = seq_read,
@@ -421,8 +514,17 @@ int o2net_debugfs_init(void)
 		goto bail;
 	}
 
+	stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR,
+					   o2net_dentry, NULL,
+					   &stats_seq_fops);
+	if (!stats_dentry) {
+		mlog_errno(-ENOMEM);
+		goto bail;
+	}
+
 	return 0;
 bail:
+	debugfs_remove(stats_dentry);
 	debugfs_remove(sc_dentry);
 	debugfs_remove(nst_dentry);
 	debugfs_remove(o2net_dentry);
@@ -431,6 +533,7 @@ bail:
 
 void o2net_debugfs_exit(void)
 {
+	debugfs_remove(stats_dentry);
 	debugfs_remove(sc_dentry);
 	debugfs_remove(nst_dentry);
 	debugfs_remove(o2net_dentry);
-- 
cgit v1.2.3


From e70d84501b630e390b4242fb4ac629b694c11484 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 27 Dec 2010 11:55:08 -0800
Subject: ocfs2: fix build for OCFS2_FS_STATS not enabled

When CONFIG_OCFS2_FS_STATS is not enabled:

fs/ocfs2/cluster/tcp.c:1254: error: implicit declaration of function 'o2net_update_recv_stats'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc:	Mark Fasheh <mfasheh@suse.com>
Cc:	Joel Becker <joel.becker@oracle.com>
Cc:	ocfs2-devel@oss.oracle.com
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/tcp.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index bc2309554d0b..3b11cb1e38fc 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -257,6 +257,8 @@ static void o2net_update_recv_stats(struct o2net_sock_container *sc)
 
 # define o2net_update_send_stats(a, b)
 
+# define o2net_update_recv_stats(sc)
+
 #endif /* CONFIG_OCFS2_FS_STATS */
 
 static inline int o2net_reconnect_delay(void)
-- 
cgit v1.2.3


From cc548166b2aadba7a566fb0a42884a9d2ff684b0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 3 Jan 2011 09:00:20 +0300
Subject: ocfs2/cluster: dereferencing before checking in nst_seq_show()

In the original code, we dereferenced "nst" before checking that it was
non-NULL.  I moved the check forward and pulled the code in an indent
level.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/netdebug.c | 47 +++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 61df89cedded..3a5835904b3d 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -133,36 +133,37 @@ static int nst_seq_show(struct seq_file *seq, void *v)
 
 	spin_lock(&o2net_debug_lock);
 	nst = next_nst(dummy_nst);
+	if (!nst)
+		goto out;
 
 	now = ktime_get();
 	sock = ktime_to_us(ktime_sub(now, nst->st_sock_time));
 	send = ktime_to_us(ktime_sub(now, nst->st_send_time));
 	status = ktime_to_us(ktime_sub(now, nst->st_status_time));
 
-	if (nst != NULL) {
-		/* get_task_comm isn't exported.  oh well. */
-		seq_printf(seq, "%p:\n"
-			   "  pid:          %lu\n"
-			   "  tgid:         %lu\n"
-			   "  process name: %s\n"
-			   "  node:         %u\n"
-			   "  sc:           %p\n"
-			   "  message id:   %d\n"
-			   "  message type: %u\n"
-			   "  message key:  0x%08x\n"
-			   "  sock acquiry: %lld usecs ago\n"
-			   "  send start:   %lld usecs ago\n"
-			   "  wait start:   %lld usecs ago\n",
-			   nst, (unsigned long)task_pid_nr(nst->st_task),
-			   (unsigned long)nst->st_task->tgid,
-			   nst->st_task->comm, nst->st_node,
-			   nst->st_sc, nst->st_id, nst->st_msg_type,
-			   nst->st_msg_key,
-			   (long long)sock,
-			   (long long)send,
-			   (long long)status);
-	}
+	/* get_task_comm isn't exported.  oh well. */
+	seq_printf(seq, "%p:\n"
+		   "  pid:          %lu\n"
+		   "  tgid:         %lu\n"
+		   "  process name: %s\n"
+		   "  node:         %u\n"
+		   "  sc:           %p\n"
+		   "  message id:   %d\n"
+		   "  message type: %u\n"
+		   "  message key:  0x%08x\n"
+		   "  sock acquiry: %lld usecs ago\n"
+		   "  send start:   %lld usecs ago\n"
+		   "  wait start:   %lld usecs ago\n",
+		   nst, (unsigned long)task_pid_nr(nst->st_task),
+		   (unsigned long)nst->st_task->tgid,
+		   nst->st_task->comm, nst->st_node,
+		   nst->st_sc, nst->st_id, nst->st_msg_type,
+		   nst->st_msg_key,
+		   (long long)sock,
+		   (long long)send,
+		   (long long)status);
 
+out:
 	spin_unlock(&o2net_debug_lock);
 
 	return 0;
-- 
cgit v1.2.3


From aecf58661961a553c254cf14536f70349127affb Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 23 Dec 2010 15:30:44 +0800
Subject: ocfs2: Remove unused truncate function from alloc.c

Tristan Ye has done some refactoring against our truncate
process, so some functions like ocfs2_prepare_truncate and
ocfs2_free_truncate_context are no use and we'd better
remove them.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/alloc.c | 74 --------------------------------------------------------
 fs/ocfs2/alloc.h |  4 ---
 2 files changed, 78 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 8ec418dd9e36..e4984e259cb6 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -565,7 +565,6 @@ static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
 	return ret;
 }
 
-static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
 static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
 					 struct ocfs2_extent_block *eb);
 static void ocfs2_adjust_rightmost_records(handle_t *handle,
@@ -7141,64 +7140,6 @@ bail:
 	return status;
 }
 
-/*
- * Expects the inode to already be locked.
- */
-int ocfs2_prepare_truncate(struct ocfs2_super *osb,
-			   struct inode *inode,
-			   struct buffer_head *fe_bh,
-			   struct ocfs2_truncate_context **tc)
-{
-	int status;
-	unsigned int new_i_clusters;
-	struct ocfs2_dinode *fe;
-	struct ocfs2_extent_block *eb;
-	struct buffer_head *last_eb_bh = NULL;
-
-	mlog_entry_void();
-
-	*tc = NULL;
-
-	new_i_clusters = ocfs2_clusters_for_bytes(osb->sb,
-						  i_size_read(inode));
-	fe = (struct ocfs2_dinode *) fe_bh->b_data;
-
-	mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size ="
-	     "%llu\n", le32_to_cpu(fe->i_clusters), new_i_clusters,
-	     (unsigned long long)le64_to_cpu(fe->i_size));
-
-	*tc = kzalloc(sizeof(struct ocfs2_truncate_context), GFP_KERNEL);
-	if (!(*tc)) {
-		status = -ENOMEM;
-		mlog_errno(status);
-		goto bail;
-	}
-	ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
-
-	if (fe->id2.i_list.l_tree_depth) {
-		status = ocfs2_read_extent_block(INODE_CACHE(inode),
-						 le64_to_cpu(fe->i_last_eb_blk),
-						 &last_eb_bh);
-		if (status < 0) {
-			mlog_errno(status);
-			goto bail;
-		}
-		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
-	}
-
-	(*tc)->tc_last_eb_bh = last_eb_bh;
-
-	status = 0;
-bail:
-	if (status < 0) {
-		if (*tc)
-			ocfs2_free_truncate_context(*tc);
-		*tc = NULL;
-	}
-	mlog_exit_void();
-	return status;
-}
-
 /*
  * 'start' is inclusive, 'end' is not.
  */
@@ -7273,18 +7214,3 @@ out_commit:
 out:
 	return ret;
 }
-
-static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
-{
-	/*
-	 * The caller is responsible for completing deallocation
-	 * before freeing the context.
-	 */
-	if (tc->tc_dealloc.c_first_suballocator != NULL)
-		mlog(ML_NOTICE,
-		     "Truncate completion has non-empty dealloc context\n");
-
-	brelse(tc->tc_last_eb_bh);
-
-	kfree(tc);
-}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 55762b554b99..3bd08a03251c 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -228,10 +228,6 @@ struct ocfs2_truncate_context {
 
 int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
 				  u64 range_start, u64 range_end);
-int ocfs2_prepare_truncate(struct ocfs2_super *osb,
-			   struct inode *inode,
-			   struct buffer_head *fe_bh,
-			   struct ocfs2_truncate_context **tc);
 int ocfs2_commit_truncate(struct ocfs2_super *osb,
 			  struct inode *inode,
 			  struct buffer_head *di_bh);
-- 
cgit v1.2.3