From 8d1ca69984ed1e5930c0537b8f606c54007d7319 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 5 Dec 2005 13:32:14 -0800
Subject: [NETFILTER]: Fix incorrect argument to ip_nat_initialized() in
 ctnetlink

ip_nat_initialized() takes enum ip_nat_manip_type as it's second argument,
not a hook number.

Noticed and initial patch by Marcus Sundberg <marcus@ingate.com>.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 3fce91bcc0b..70402e0ed00 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -877,7 +877,7 @@ ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
 		DEBUGP("NAT status: %lu\n", 
 		       status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
 		
-		if (ip_nat_initialized(ct, hooknum))
+		if (ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
 			return -EEXIST;
 		ip_nat_setup_info(ct, &range, hooknum);
 
-- 
cgit v1.2.3


From f16c910724250c1af0f53111b4c76505000819f6 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Mon, 5 Dec 2005 13:32:50 -0800
Subject: [NETFILTER]: nf_conntrack: Fix missing check for ICMPv6 type

This makes nf_conntrack_icmpv6 check that ICMPv6 type isn't < 128
to avoid accessing out of array valid_new[] and invmap[].

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index c0f1da5497a..a7e03cfacd0 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -68,8 +68,8 @@ static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 		[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_REPLY +1
 	};
 
-	__u8 type = orig->dst.u.icmp.type - 128;
-	if (type >= sizeof(invmap) || !invmap[type])
+	int type = orig->dst.u.icmp.type - 128;
+	if (type < 0 || type >= sizeof(invmap) || !invmap[type])
 		return 0;
 
 	tuple->src.u.icmp.id   = orig->src.u.icmp.id;
@@ -129,12 +129,12 @@ static int icmpv6_new(struct nf_conn *conntrack,
 		[ICMPV6_ECHO_REQUEST - 128] = 1,
 		[ICMPV6_NI_QUERY - 128] = 1
 	};
+	int type = conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128;
 
-	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new)
-	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) {
+	if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
 		/* Can't create a new ICMPv6 `conn' with this. */
-		DEBUGP("icmp: can't create new conn with type %u\n",
-		       conntrack->tuplehash[0].tuple.dst.u.icmp.type);
+		DEBUGP("icmpv6: can't create new conn with type %u\n",
+		       type + 128);
 		NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
 		return 0;
 	}
-- 
cgit v1.2.3


From 3ebbe0cdd41f62e5c79cb4d7aa42a1ca50d7d2f2 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Mon, 5 Dec 2005 13:33:26 -0800
Subject: [NETFILTER]: nfnetlink: Fix calculation of minimum message length

At least, valid nfnetlink message should have nlmsghdr and nfgenmsg.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a60c59b9763..95fdf04f1d8 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -162,7 +162,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
 		return -EINVAL;
 	}
 
-	min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg));
+	min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
 	if (unlikely(nlh->nlmsg_len < min_len))
 		return -EINVAL;
 
@@ -236,8 +236,7 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb,
 	}
 
 	/* All the messages must at least contain nfgenmsg */
-	if (nlh->nlmsg_len < 
-			NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) {
+	if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) {
 		DEBUGP("received message was too short\n");
 		return 0;
 	}
-- 
cgit v1.2.3


From afe5c6bb034bfa5824f8e7def6a739653e8f4655 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 5 Dec 2005 13:33:50 -0800
Subject: [NETFILTER]: Fix ip_conntrack_flush abuse in ctnetlink

ip_conntrack_flush() used to be part of ip_conntrack_cleanup(), which needs
to drop _all_ references on module unload. Table flushed using ctnetlink
just needs to clean the table and doesn't need to flush the event cache or
wait for any references attached to skbs. Move everything but pure table
flushing back to ip_conntrack_cleanup().

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_core.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 7a4ecddd597..84c66dbfeda 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1345,6 +1345,11 @@ static int kill_all(struct ip_conntrack *i, void *data)
 	return 1;
 }
 
+void ip_conntrack_flush(void)
+{
+	ip_ct_iterate_cleanup(kill_all, NULL);
+}
+
 static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
 {
 	if (vmalloced)
@@ -1354,8 +1359,12 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
 			   get_order(sizeof(struct list_head) * size));
 }
 
-void ip_conntrack_flush(void)
+/* Mishearing the voices in his head, our hero wonders how he's
+   supposed to kill the mall. */
+void ip_conntrack_cleanup(void)
 {
+	ip_ct_attach = NULL;
+
 	/* This makes sure all current packets have passed through
            netfilter framework.  Roll on, two-stage module
            delete... */
@@ -1363,7 +1372,7 @@ void ip_conntrack_flush(void)
 
 	ip_ct_event_cache_flush();
  i_see_dead_people:
-	ip_ct_iterate_cleanup(kill_all, NULL);
+	ip_conntrack_flush();
 	if (atomic_read(&ip_conntrack_count) != 0) {
 		schedule();
 		goto i_see_dead_people;
@@ -1371,14 +1380,7 @@ void ip_conntrack_flush(void)
 	/* wait until all references to ip_conntrack_untracked are dropped */
 	while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
 		schedule();
-}
 
-/* Mishearing the voices in his head, our hero wonders how he's
-   supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
-{
-	ip_ct_attach = NULL;
-	ip_conntrack_flush();
 	kmem_cache_destroy(ip_conntrack_cachep);
 	kmem_cache_destroy(ip_conntrack_expect_cachep);
 	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
-- 
cgit v1.2.3


From 0be7fa92ca162bf5e7993c392e6f93909d617bbb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 5 Dec 2005 13:34:51 -0800
Subject: [NETFILTER]: Fix CTA_PROTO_NUM attribute size in ctnetlink

CTA_PROTO_NUM is a u_int8_t.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 70402e0ed00..d058ac41bfd 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -503,7 +503,7 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
 }
 
 static const size_t cta_min_proto[CTA_PROTO_MAX] = {
-	[CTA_PROTO_NUM-1]	= sizeof(u_int16_t),
+	[CTA_PROTO_NUM-1]	= sizeof(u_int8_t),
 	[CTA_PROTO_SRC_PORT-1]	= sizeof(u_int16_t),
 	[CTA_PROTO_DST_PORT-1]	= sizeof(u_int16_t),
 	[CTA_PROTO_ICMP_TYPE-1]	= sizeof(u_int8_t),
@@ -528,7 +528,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
 
 	if (!tb[CTA_PROTO_NUM-1])
 		return -EINVAL;
-	tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
+	tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
 
 	proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
 
-- 
cgit v1.2.3


From a79575633300adb5d3f1bd856cc518c45fefcb86 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 5 Dec 2005 13:36:25 -0800
Subject: [NETFILTER]: Mark ctnetlink as EXPERIMENTAL

Should have been marked EXPERIMENTAL from the beginning, as the current
bunch of fixes show.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig | 8 ++++----
 net/netfilter/Kconfig      | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 0bc00528d88..88a60650e6b 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -56,8 +56,8 @@ config IP_NF_CONNTRACK_MARK
 	  instead of the individual packets.
 	
 config IP_NF_CONNTRACK_EVENTS
-	bool "Connection tracking events"
-	depends on IP_NF_CONNTRACK
+	bool "Connection tracking events (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && IP_NF_CONNTRACK
 	help
 	  If this option is enabled, the connection tracking code will
 	  provide a notifier chain that can be used by other kernel code
@@ -66,8 +66,8 @@ config IP_NF_CONNTRACK_EVENTS
 	  IF unsure, say `N'.
 
 config IP_NF_CONNTRACK_NETLINK
-	tristate 'Connection tracking netlink interface'
-	depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
+	tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
+	depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
 	depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
 	help
 	  This option enables support for a netlink-based userspace interface
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a84f9221e5f..794c41d19b2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -61,8 +61,8 @@ config NF_CONNTRACK_MARK
 	  instead of the individual packets.
 
 config NF_CONNTRACK_EVENTS
-	bool "Connection tracking events"
-	depends on NF_CONNTRACK
+	bool "Connection tracking events (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && NF_CONNTRACK
 	help
 	  If this option is enabled, the connection tracking code will
 	  provide a notifier chain that can be used by other kernel code
-- 
cgit v1.2.3


From 6636568cf85ef5898a892e90fcc88b61cca9ca27 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 5 Dec 2005 13:36:50 -0800
Subject: [NETFILTER]: Wait for untracked references in nf_conntrack module
 unload

Noticed by Pablo Neira <pablo@eurodev.net>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1da678303d7..a7c7b490cf2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1383,6 +1383,9 @@ void nf_conntrack_cleanup(void)
 		schedule();
 		goto i_see_dead_people;
 	}
+	/* wait until all references to nf_conntrack_untracked are dropped */
+	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+		schedule();
 
 	for (i = 0; i < NF_CT_F_NUM; i++) {
 		if (nf_ct_cache[i].use == 0)
-- 
cgit v1.2.3


From 266c8543480e2202ab63d1d604a5ca049f350cd8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 5 Dec 2005 13:37:33 -0800
Subject: [NETFILTER]: Fix unbalanced read_unlock_bh in ctnetlink

NFA_NEST calls NFA_PUT which jumps to nfattr_failure if the skb has no
room left. We call read_unlock_bh at nfattr_failure for the NFA_PUT inside
the locked section, so move NFA_NEST inside the locked section too.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index aeb7353d477..e7fa29e576d 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -341,9 +341,10 @@ static int tcp_print_conntrack(struct seq_file *s,
 static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
 			 const struct ip_conntrack *ct)
 {
-	struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+	struct nfattr *nest_parms;
 	
 	read_lock_bh(&tcp_lock);
+	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
 	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
 		&ct->proto.tcp.state);
 	read_unlock_bh(&tcp_lock);
-- 
cgit v1.2.3


From 2fdf1faa8e33082d691bcba18814276f2bd5a6f0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 5 Dec 2005 13:38:16 -0800
Subject: [NETFILTER]: Don't use conntrack entry after dropping the reference

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_netlink.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index d058ac41bfd..91fe8f2e38f 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -728,11 +728,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			return -ENOENT;
 		}
 	}	
-	if (del_timer(&ct->timeout)) {
-		ip_conntrack_put(ct);
+	if (del_timer(&ct->timeout))
 		ct->timeout.function((unsigned long)ct);
-		return 0;
-	}
+
 	ip_conntrack_put(ct);
 	DEBUGP("leaving\n");
 
-- 
cgit v1.2.3


From dab9630fb3d206f49658066a3ecf80ea120364db Mon Sep 17 00:00:00 2001
From: Martin Waitz <tali@admingilde.org>
Date: Mon, 5 Dec 2005 13:40:12 -0800
Subject: [NET]: make function pointer argument parseable by kernel-doc

When a function takes a function pointer as argument it should use the 'return
(*pointer)(params...)' syntax used everywhere else in the kernel as this is
recognized by kernel-doc.

Signed-off-by: Martin Waitz <tali@admingilde.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b7d13a4fff4..83fee37de38 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1725,7 +1725,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
  * of the skb if any page alloc fails user this procedure returns  -ENOMEM
  */
 int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
-			int getfrag(void *from, char *to, int offset,
+			int (*getfrag)(void *from, char *to, int offset,
 					int len, int odd, struct sk_buff *skb),
 			void *from, int length)
 {
-- 
cgit v1.2.3


From 1f12bcc9d1840fd26bf577065214f1ebeb2609ba Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <steve@chygwyn.com>
Date: Mon, 5 Dec 2005 13:42:06 -0800
Subject: [DECNET]: add memory buffer settings

The patch (originally from Steve) simply adds memory buffer settings to
DECnet similar to those in TCP.

Signed-off-by: Patrick Caulfield <patrick@tykepenguin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h         |  3 +++
 include/net/dn.h               |  4 ++++
 net/decnet/af_decnet.c         | 25 ++++++++++++++++++++++---
 net/decnet/sysctl_net_decnet.c | 33 +++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 6bc03c911a8..4be34ef8c2f 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -670,6 +670,9 @@ enum {
 	NET_DECNET_DST_GC_INTERVAL = 9,
 	NET_DECNET_CONF = 10,
 	NET_DECNET_NO_FC_MAX_CWND = 11,
+	NET_DECNET_MEM = 12,
+	NET_DECNET_RMEM = 13,
+	NET_DECNET_WMEM = 14,
 	NET_DECNET_DEBUG_LEVEL = 255
 };
 
diff --git a/include/net/dn.h b/include/net/dn.h
index c1dbbd22279..a4b6168e1e2 100644
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -234,4 +234,8 @@ extern int decnet_di_count;
 extern int decnet_dr_count;
 extern int decnet_no_fc_max_cwnd;
 
+extern int sysctl_decnet_mem[3];
+extern int sysctl_decnet_wmem[3];
+extern int sysctl_decnet_rmem[3];
+
 #endif /* _NET_DN_H */
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index f89e55f814d..d402e9020c6 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -153,6 +153,7 @@ static struct proto_ops dn_proto_ops;
 static DEFINE_RWLOCK(dn_hash_lock);
 static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
 static struct hlist_head dn_wild_sk;
+static atomic_t decnet_memory_allocated;
 
 static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen, int flags);
 static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
@@ -446,10 +447,26 @@ static void dn_destruct(struct sock *sk)
 	dst_release(xchg(&sk->sk_dst_cache, NULL));
 }
 
+static int dn_memory_pressure;
+
+static void dn_enter_memory_pressure(void)
+{
+	if (!dn_memory_pressure) {
+		dn_memory_pressure = 1;
+	}
+}
+
 static struct proto dn_proto = {
-	.name	  = "DECNET",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct dn_sock),
+	.name			= "NSP",
+	.owner			= THIS_MODULE,
+	.enter_memory_pressure	= dn_enter_memory_pressure,
+	.memory_pressure	= &dn_memory_pressure,
+	.memory_allocated	= &decnet_memory_allocated,
+	.sysctl_mem		= sysctl_decnet_mem,
+	.sysctl_wmem		= sysctl_decnet_wmem,
+	.sysctl_rmem		= sysctl_decnet_rmem,
+	.max_header		= DN_MAX_NSP_DATA_HEADER + 64,
+	.obj_size		= sizeof(struct dn_sock),
 };
 
 static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
@@ -470,6 +487,8 @@ static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
 	sk->sk_family      = PF_DECnet;
 	sk->sk_protocol    = 0;
 	sk->sk_allocation  = gfp;
+	sk->sk_sndbuf	   = sysctl_decnet_wmem[1];
+	sk->sk_rcvbuf	   = sysctl_decnet_rmem[1];
 
 	/* Initialization of DECnet Session Control Port		*/
 	scp = DN_SK(sk);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 02bca49cb50..0e9d2c57116 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -10,6 +10,7 @@
  *
  * Changes:
  * Steve Whitehouse - C99 changes and default device handling
+ * Steve Whitehouse - Memory buffer settings, like the tcp ones
  *
  */
 #include <linux/config.h>
@@ -37,6 +38,11 @@ int decnet_dr_count = 3;
 int decnet_log_martians = 1;
 int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
 
+/* Reasonable defaults, I hope, based on tcp's defaults */
+int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
+int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
+int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
+
 #ifdef CONFIG_SYSCTL
 extern int decnet_dst_gc_interval;
 static int min_decnet_time_wait[] = { 5 };
@@ -428,6 +434,33 @@ static ctl_table dn_table[] = {
 		.extra1 = &min_decnet_no_fc_max_cwnd,
 		.extra2 = &max_decnet_no_fc_max_cwnd
 	},
+       {
+                .ctl_name = NET_DECNET_MEM,
+                .procname = "decnet_mem",
+                .data = &sysctl_decnet_mem,
+                .maxlen = sizeof(sysctl_decnet_mem),
+                .mode = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = NET_DECNET_RMEM,
+                .procname = "decnet_rmem",
+                .data = &sysctl_decnet_rmem,
+                .maxlen = sizeof(sysctl_decnet_rmem),
+                .mode = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = NET_DECNET_WMEM,
+                .procname = "decnet_wmem",
+                .data = &sysctl_decnet_wmem,
+                .maxlen = sizeof(sysctl_decnet_wmem),
+                .mode = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 	{
 		.ctl_name = NET_DECNET_DEBUG_LEVEL,
 		.procname = "debug",
-- 
cgit v1.2.3


From 5b4956138173cb8b58d83d3173360e8e681a2b66 Mon Sep 17 00:00:00 2001
From: Thomas Young <tyo@ee.mu.oz.au>
Date: Tue, 6 Dec 2005 16:16:34 -0800
Subject: [TCP] Vegas: stop resetting rtt every ack

Move the resetting of rtt measurements to inside the once per RTT
block of code.

Signed-off-by: Thomas Young <tyo@ee.mu.oz.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_vegas.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index b7d296a8ac6..8f06a479305 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -333,11 +333,11 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 			else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
 				tp->snd_cwnd = tp->snd_cwnd_clamp;
 		}
-	}
 
-	/* Wipe the slate clean for the next RTT. */
-	vegas->cntRTT = 0;
-	vegas->minRTT = 0x7fffffff;
+		/* Wipe the slate clean for the next RTT. */
+		vegas->cntRTT = 0;
+		vegas->minRTT = 0x7fffffff;
+	}
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
-- 
cgit v1.2.3


From 0d7bef600acab393898bd5553e167496587da3e1 Mon Sep 17 00:00:00 2001
From: Thomas Young <tyo@ee.mu.oz.au>
Date: Tue, 6 Dec 2005 16:17:11 -0800
Subject: [TCP] Vegas: Remove extra call to tcp_vegas_rtt_calc

Remove unneeded call to tcp_vegas_rtt_calc. The more accurate
microsecond value has already been registered prior to calling
tcp_vegas_cong_avoid.

Signed-off-by: Thomas Young <tyo@ee.mu.oz.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_vegas.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 8f06a479305..13e7e6e8df1 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -215,14 +215,6 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 		vegas->beg_snd_nxt  = tp->snd_nxt;
 		vegas->beg_snd_cwnd = tp->snd_cwnd;
 
-		/* Take into account the current RTT sample too, to
-		 * decrease the impact of delayed acks. This double counts
-		 * this sample since we count it for the next window as well,
-		 * but that's not too awful, since we're taking the min,
-		 * rather than averaging.
-		 */
-		tcp_vegas_rtt_calc(sk, seq_rtt * 1000);
-
 		/* We do the Vegas calculations only if we got enough RTT
 		 * samples that we can be reasonably sure that we got
 		 * at least one RTT sample that wasn't from a delayed ACK.
-- 
cgit v1.2.3


From dfb4b9dceb35c567a595ae5e9d035cfda044a103 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 6 Dec 2005 16:24:52 -0800
Subject: [TCP] Vegas: timestamp before clone

We have to store the congestion control timestamp on the SKB before we
clone it, not after.  Else we get no timestamping information at all.

tcp_transmit_skb() has been reworked so that we can do the timestamp
still in one spot, instead of at all the call sites.

Problem discovered, and initial fix, from Tom Young
<tyo@ee.unimelb.edu.au>.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 233 +++++++++++++++++++++++++++-----------------------
 1 file changed, 124 insertions(+), 109 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 029c70dfb58..b7325e0b406 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -262,122 +262,139 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
  * We are working here with either a clone of the original
  * SKB, or a fresh unique copy made by the retransmit engine.
  */
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask)
 {
-	if (skb != NULL) {
-		const struct inet_connection_sock *icsk = inet_csk(sk);
-		struct inet_sock *inet = inet_sk(sk);
-		struct tcp_sock *tp = tcp_sk(sk);
-		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-		int tcp_header_size = tp->tcp_header_len;
-		struct tcphdr *th;
-		int sysctl_flags;
-		int err;
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_sock *inet;
+	struct tcp_sock *tp;
+	struct tcp_skb_cb *tcb;
+	int tcp_header_size;
+	struct tcphdr *th;
+	int sysctl_flags;
+	int err;
+
+	BUG_ON(!skb || !tcp_skb_pcount(skb));
+
+	/* If congestion control is doing timestamping, we must
+	 * take such a timestamp before we potentially clone/copy.
+	 */
+	if (icsk->icsk_ca_ops->rtt_sample)
+		__net_timestamp(skb);
+
+	if (likely(clone_it)) {
+		if (unlikely(skb_cloned(skb)))
+			skb = pskb_copy(skb, gfp_mask);
+		else
+			skb = skb_clone(skb, gfp_mask);
+		if (unlikely(!skb))
+			return -ENOBUFS;
+	}
 
-		BUG_ON(!tcp_skb_pcount(skb));
+	inet = inet_sk(sk);
+	tp = tcp_sk(sk);
+	tcb = TCP_SKB_CB(skb);
+	tcp_header_size = tp->tcp_header_len;
 
 #define SYSCTL_FLAG_TSTAMPS	0x1
 #define SYSCTL_FLAG_WSCALE	0x2
 #define SYSCTL_FLAG_SACK	0x4
 
-		/* If congestion control is doing timestamping */
-		if (icsk->icsk_ca_ops->rtt_sample)
-			__net_timestamp(skb);
-
-		sysctl_flags = 0;
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
-			if(sysctl_tcp_timestamps) {
-				tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
-				sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
-			}
-			if(sysctl_tcp_window_scaling) {
-				tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
-				sysctl_flags |= SYSCTL_FLAG_WSCALE;
-			}
-			if(sysctl_tcp_sack) {
-				sysctl_flags |= SYSCTL_FLAG_SACK;
-				if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
-					tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
-			}
-		} else if (tp->rx_opt.eff_sacks) {
-			/* A SACK is 2 pad bytes, a 2 byte header, plus
-			 * 2 32-bit sequence numbers for each SACK block.
-			 */
-			tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
-					    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+	sysctl_flags = 0;
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
+		if(sysctl_tcp_timestamps) {
+			tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
+			sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
 		}
-		
-		if (tcp_packets_in_flight(tp) == 0)
-			tcp_ca_event(sk, CA_EVENT_TX_START);
-
-		th = (struct tcphdr *) skb_push(skb, tcp_header_size);
-		skb->h.th = th;
-		skb_set_owner_w(skb, sk);
-
-		/* Build TCP header and checksum it. */
-		th->source		= inet->sport;
-		th->dest		= inet->dport;
-		th->seq			= htonl(tcb->seq);
-		th->ack_seq		= htonl(tp->rcv_nxt);
-		*(((__u16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) | tcb->flags);
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			/* RFC1323: The window in SYN & SYN/ACK segments
-			 * is never scaled.
-			 */
-			th->window	= htons(tp->rcv_wnd);
-		} else {
-			th->window	= htons(tcp_select_window(sk));
+		if (sysctl_tcp_window_scaling) {
+			tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
+			sysctl_flags |= SYSCTL_FLAG_WSCALE;
 		}
-		th->check		= 0;
-		th->urg_ptr		= 0;
-
-		if (tp->urg_mode &&
-		    between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) {
-			th->urg_ptr		= htons(tp->snd_up-tcb->seq);
-			th->urg			= 1;
+		if (sysctl_tcp_sack) {
+			sysctl_flags |= SYSCTL_FLAG_SACK;
+			if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+				tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
 		}
+	} else if (unlikely(tp->rx_opt.eff_sacks)) {
+		/* A SACK is 2 pad bytes, a 2 byte header, plus
+		 * 2 32-bit sequence numbers for each SACK block.
+		 */
+		tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
+				    (tp->rx_opt.eff_sacks *
+				     TCPOLEN_SACK_PERBLOCK));
+	}
+		
+	if (tcp_packets_in_flight(tp) == 0)
+		tcp_ca_event(sk, CA_EVENT_TX_START);
+
+	th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+	skb->h.th = th;
+	skb_set_owner_w(skb, sk);
+
+	/* Build TCP header and checksum it. */
+	th->source		= inet->sport;
+	th->dest		= inet->dport;
+	th->seq			= htonl(tcb->seq);
+	th->ack_seq		= htonl(tp->rcv_nxt);
+	*(((__u16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
+					tcb->flags);
+
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		/* RFC1323: The window in SYN & SYN/ACK segments
+		 * is never scaled.
+		 */
+		th->window	= htons(tp->rcv_wnd);
+	} else {
+		th->window	= htons(tcp_select_window(sk));
+	}
+	th->check		= 0;
+	th->urg_ptr		= 0;
 
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			tcp_syn_build_options((__u32 *)(th + 1),
-					      tcp_advertise_mss(sk),
-					      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
-					      (sysctl_flags & SYSCTL_FLAG_SACK),
-					      (sysctl_flags & SYSCTL_FLAG_WSCALE),
-					      tp->rx_opt.rcv_wscale,
-					      tcb->when,
-		      			      tp->rx_opt.ts_recent);
-		} else {
-			tcp_build_and_update_options((__u32 *)(th + 1),
-						     tp, tcb->when);
+	if (unlikely(tp->urg_mode &&
+		     between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) {
+		th->urg_ptr		= htons(tp->snd_up-tcb->seq);
+		th->urg			= 1;
+	}
 
-			TCP_ECN_send(sk, tp, skb, tcp_header_size);
-		}
-		tp->af_specific->send_check(sk, th, skb->len, skb);
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		tcp_syn_build_options((__u32 *)(th + 1),
+				      tcp_advertise_mss(sk),
+				      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+				      (sysctl_flags & SYSCTL_FLAG_SACK),
+				      (sysctl_flags & SYSCTL_FLAG_WSCALE),
+				      tp->rx_opt.rcv_wscale,
+				      tcb->when,
+				      tp->rx_opt.ts_recent);
+	} else {
+		tcp_build_and_update_options((__u32 *)(th + 1),
+					     tp, tcb->when);
+		TCP_ECN_send(sk, tp, skb, tcp_header_size);
+	}
 
-		if (tcb->flags & TCPCB_FLAG_ACK)
-			tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+	tp->af_specific->send_check(sk, th, skb->len, skb);
 
-		if (skb->len != tcp_header_size)
-			tcp_event_data_sent(tp, skb, sk);
+	if (likely(tcb->flags & TCPCB_FLAG_ACK))
+		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
-		TCP_INC_STATS(TCP_MIB_OUTSEGS);
+	if (skb->len != tcp_header_size)
+		tcp_event_data_sent(tp, skb, sk);
 
-		err = tp->af_specific->queue_xmit(skb, 0);
-		if (err <= 0)
-			return err;
+	TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
-		tcp_enter_cwr(sk);
+	err = tp->af_specific->queue_xmit(skb, 0);
+	if (unlikely(err <= 0))
+		return err;
+
+	tcp_enter_cwr(sk);
+
+	/* NET_XMIT_CN is special. It does not guarantee,
+	 * that this packet is lost. It tells that device
+	 * is about to start to drop packets or already
+	 * drops some packets of the same priority and
+	 * invokes us to send less aggressively.
+	 */
+	return err == NET_XMIT_CN ? 0 : err;
 
-		/* NET_XMIT_CN is special. It does not guarantee,
-		 * that this packet is lost. It tells that device
-		 * is about to start to drop packets or already
-		 * drops some packets of the same priority and
-		 * invokes us to send less aggressively.
-		 */
-		return err == NET_XMIT_CN ? 0 : err;
-	}
-	return -ENOBUFS;
 #undef SYSCTL_FLAG_TSTAMPS
 #undef SYSCTL_FLAG_WSCALE
 #undef SYSCTL_FLAG_SACK
@@ -1036,7 +1053,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-		if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
+		if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC)))
 			break;
 
 		/* Advance the send_head.  This one is sent out.
@@ -1109,7 +1126,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
 		/* Send it out now. */
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-		if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) {
+		if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
 			update_send_head(sk, tp, skb);
 			tcp_cwnd_validate(sk, tp);
 			return;
@@ -1429,9 +1446,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-	err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
-				    pskb_copy(skb, GFP_ATOMIC):
-				    skb_clone(skb, GFP_ATOMIC)));
+	err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 
 	if (err == 0) {
 		/* Update global TCP statistics. */
@@ -1665,7 +1680,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	if (tcp_transmit_skb(sk, skb))
+	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
 }
 
@@ -1700,7 +1715,7 @@ int tcp_send_synack(struct sock *sk)
 		TCP_ECN_send_synack(tcp_sk(sk), skb);
 	}
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+	return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 }
 
 /*
@@ -1861,7 +1876,7 @@ int tcp_connect(struct sock *sk)
 	__skb_queue_tail(&sk->sk_write_queue, buff);
 	sk_charge_skb(sk, buff);
 	tp->packets_out += tcp_skb_pcount(buff);
-	tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
+	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
 	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the SYN until an answer. */
@@ -1957,7 +1972,7 @@ void tcp_send_ack(struct sock *sk)
 		/* Send it off, this clears delayed acks for us. */
 		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
 		TCP_SKB_CB(buff)->when = tcp_time_stamp;
-		tcp_transmit_skb(sk, buff);
+		tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
 	}
 }
 
@@ -1997,7 +2012,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	return tcp_transmit_skb(sk, skb);
+	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
 
 int tcp_write_wakeup(struct sock *sk)
@@ -2030,7 +2045,7 @@ int tcp_write_wakeup(struct sock *sk)
 
 			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
 			TCP_SKB_CB(skb)->when = tcp_time_stamp;
-			err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+			err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 			if (!err) {
 				update_send_head(sk, tp, skb);
 			}
-- 
cgit v1.2.3


From 4ebf0ae2618fbbb0d365e5d295a30ccfcb91fe0b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 6 Dec 2005 16:38:35 -0800
Subject: [AF_PACKET]: Convert PACKET_MMAP over to vm_insert_page().

So we can properly use __GFP_COMP and avoid the use of
PG_reserved pages.

With extremely helpful review from Hugh Dickins.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 115 ++++++++++++++++++++++++++-----------------------
 1 file changed, 61 insertions(+), 54 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 499ae3df4a4..3e246276041 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1587,23 +1587,47 @@ static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
 	return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
 }
 
-static void free_pg_vec(char **pg_vec, unsigned order, unsigned len)
+static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
 {
 	int i;
 
-	for (i=0; i<len; i++) {
-		if (pg_vec[i]) {
-			struct page *page, *pend;
-
-			pend = pg_vec_endpage(pg_vec[i], order);
-			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
-				ClearPageReserved(page);
-			free_pages((unsigned long)pg_vec[i], order);
-		}
+	for (i = 0; i < len; i++) {
+		if (likely(pg_vec[i]))
+			free_pages((unsigned long) pg_vec[i], order);
 	}
 	kfree(pg_vec);
 }
 
+static inline char *alloc_one_pg_vec_page(unsigned long order)
+{
+	return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
+					 order);
+}
+
+static char **alloc_pg_vec(struct tpacket_req *req, int order)
+{
+	unsigned int block_nr = req->tp_block_nr;
+	char **pg_vec;
+	int i;
+
+	pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
+	if (unlikely(!pg_vec))
+		goto out;
+
+	for (i = 0; i < block_nr; i++) {
+		pg_vec[i] = alloc_one_pg_vec_page(order);
+		if (unlikely(!pg_vec[i]))
+			goto out_free_pgvec;
+	}
+
+out:
+	return pg_vec;
+
+out_free_pgvec:
+	free_pg_vec(pg_vec, order, block_nr);
+	pg_vec = NULL;
+	goto out;
+}
 
 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
 {
@@ -1617,64 +1641,46 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 
 		/* Sanity tests and some calculations */
 
-		if (po->pg_vec)
+		if (unlikely(po->pg_vec))
 			return -EBUSY;
 
-		if ((int)req->tp_block_size <= 0)
+		if (unlikely((int)req->tp_block_size <= 0))
 			return -EINVAL;
-		if (req->tp_block_size&(PAGE_SIZE-1))
+		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
 			return -EINVAL;
-		if (req->tp_frame_size < TPACKET_HDRLEN)
+		if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
 			return -EINVAL;
-		if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
+		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
 			return -EINVAL;
 
 		po->frames_per_block = req->tp_block_size/req->tp_frame_size;
-		if (po->frames_per_block <= 0)
+		if (unlikely(po->frames_per_block <= 0))
 			return -EINVAL;
-		if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
+		if (unlikely((po->frames_per_block * req->tp_block_nr) !=
+			     req->tp_frame_nr))
 			return -EINVAL;
-		/* OK! */
-
-		/* Allocate page vector */
-		while ((PAGE_SIZE<<order) < req->tp_block_size)
-			order++;
 
 		err = -ENOMEM;
-
-		pg_vec = kmalloc(req->tp_block_nr*sizeof(char *), GFP_KERNEL);
-		if (pg_vec == NULL)
+		order = get_order(req->tp_block_size);
+		pg_vec = alloc_pg_vec(req, order);
+		if (unlikely(!pg_vec))
 			goto out;
-		memset(pg_vec, 0, req->tp_block_nr*sizeof(char **));
-
-		for (i=0; i<req->tp_block_nr; i++) {
-			struct page *page, *pend;
-			pg_vec[i] = (char *)__get_free_pages(GFP_KERNEL, order);
-			if (!pg_vec[i])
-				goto out_free_pgvec;
-
-			pend = pg_vec_endpage(pg_vec[i], order);
-			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
-				SetPageReserved(page);
-		}
-		/* Page vector is allocated */
 
 		l = 0;
-		for (i=0; i<req->tp_block_nr; i++) {
+		for (i = 0; i < req->tp_block_nr; i++) {
 			char *ptr = pg_vec[i];
 			struct tpacket_hdr *header;
 			int k;
 
-			for (k=0; k<po->frames_per_block; k++) {
-				
-				header = (struct tpacket_hdr*)ptr;
+			for (k = 0; k < po->frames_per_block; k++) {
+				header = (struct tpacket_hdr *) ptr;
 				header->tp_status = TP_STATUS_KERNEL;
 				ptr += req->tp_frame_size;
 			}
 		}
 		/* Done */
 	} else {
-		if (req->tp_frame_nr)
+		if (unlikely(req->tp_frame_nr))
 			return -EINVAL;
 	}
 
@@ -1701,7 +1707,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 
 		spin_lock_bh(&sk->sk_receive_queue.lock);
 		pg_vec = XC(po->pg_vec, pg_vec);
-		po->frame_max = req->tp_frame_nr-1;
+		po->frame_max = (req->tp_frame_nr - 1);
 		po->head = 0;
 		po->frame_size = req->tp_frame_size;
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -1728,7 +1734,6 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 
 	release_sock(sk);
 
-out_free_pgvec:
 	if (pg_vec)
 		free_pg_vec(pg_vec, order, req->tp_block_nr);
 out:
@@ -1755,17 +1760,19 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st
 	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
 		goto out;
 
-	atomic_inc(&po->mapped);
 	start = vma->vm_start;
-	err = -EAGAIN;
-	for (i=0; i<po->pg_vec_len; i++) {
-		if (remap_pfn_range(vma, start,
-				     __pa(po->pg_vec[i]) >> PAGE_SHIFT,
-				     po->pg_vec_pages*PAGE_SIZE,
-				     vma->vm_page_prot))
-			goto out;
-		start += po->pg_vec_pages*PAGE_SIZE;
+	for (i = 0; i < po->pg_vec_len; i++) {
+		struct page *page = virt_to_page(po->pg_vec[i]);
+		int pg_num;
+
+		for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
+			err = vm_insert_page(vma, start, page);
+			if (unlikely(err))
+				goto out;
+			start += PAGE_SIZE;
+		}
 	}
+	atomic_inc(&po->mapped);
 	vma->vm_ops = &packet_mmap_ops;
 	err = 0;
 
-- 
cgit v1.2.3


From 1f064a87c80c86bd3878e6633de61e8b71aa6788 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 6 Dec 2005 17:36:44 -0800
Subject: [TG3]: remove warning on race

[ Move assosciated code comment to the correct spot, and
  update driver version and release date -DaveM ]

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 1828a6bf845..47bd4a39442 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -68,8 +68,8 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"3.43"
-#define DRV_MODULE_RELDATE	"Oct 24, 2005"
+#define DRV_MODULE_VERSION	"3.44"
+#define DRV_MODULE_RELDATE	"Dec 6, 2005"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
@@ -3565,12 +3565,15 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!spin_trylock(&tp->tx_lock))
 		return NETDEV_TX_LOCKED; 
 
-	/* This is a hard error, log it. */
 	if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
-		netif_stop_queue(dev);
+		if (!netif_queue_stopped(dev)) {
+			netif_stop_queue(dev);
+
+			/* This is a hard error, log it. */
+			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
+			       "queue awake!\n", dev->name);
+		}
 		spin_unlock(&tp->tx_lock);
-		printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
-		       dev->name);
 		return NETDEV_TX_BUSY;
 	}
 
-- 
cgit v1.2.3


From 246a421207007a034da9b8cfa578bc00d16a9553 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 8 Dec 2005 15:21:39 -0800
Subject: [NET]: Fix NULL pointer deref in checksum debugging.

The problem I was seeing turned out to be that skb->dev is NULL when
the checksum is being completed in user context. This happens because
the reference to the device is dropped (to allow it to be released
when packets are in the queue).

Because skb->dev was NULL, the netdev_rx_csum_fault was panicing on
deref of dev->name. How about this?

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 0b48e294aaf..a5efc9ae010 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1113,7 +1113,8 @@ out:
 void netdev_rx_csum_fault(struct net_device *dev)
 {
 	if (net_ratelimit()) {
-		printk(KERN_ERR "%s: hw csum failure.\n", dev->name);
+		printk(KERN_ERR "%s: hw csum failure.\n", 
+			dev ? dev->name : "<unknown>");
 		dump_stack();
 	}
 }
-- 
cgit v1.2.3


From 73d4f84fd001b0be67fea46e84b75e6a7a5da08e Mon Sep 17 00:00:00 2001
From: Kazunori MIYAZAWA <miyazawa@linux-ipv6.org>
Date: Thu, 8 Dec 2005 23:11:42 -0800
Subject: [IPv6] IPsec: fix pmtu calculation of esp

It is a simple bug which uses the wrong member.

This bug does not seriously affect ordinary use of IPsec.
But it is important to pass IPv6 ready logo phase-2
conformance test of IPsec SGW.

Signed-off-by: Kazunori MIYAZAWA <miyazawa@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/esp6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 40d9a1935ab..8bfbe997079 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -248,7 +248,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
 	if (esp->conf.padlen)
 		mtu = ALIGN(mtu, esp->conf.padlen);
 
-	return mtu + x->props.header_len + esp->auth.icv_full_len;
+	return mtu + x->props.header_len + esp->auth.icv_trunc_len;
 }
 
 static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-- 
cgit v1.2.3