aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2015-10-06 16:21:54 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2015-10-06 16:22:39 +1100
commitc3d233214fe13a197dbb29c1a0194e9bbfcc413b (patch)
treee2f79b86033248936a2e1aaba3a29d9b28f0d48b
parentad96469caef3a87baa102fce0b6bda21fef21b56 (diff)
Revert "ipv4: L3 hash-based multipath"
This reverts commit 0e884c78ee19e902f300ed147083c28a0c6302f0.
-rw-r--r--include/net/ip_fib.h14
-rw-r--r--net/ipv4/fib_semantics.c140
-rw-r--r--net/ipv4/route.c16
3 files changed, 72 insertions, 98 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 7a51fd8d99e4..727d6e9a9685 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -79,7 +79,7 @@ struct fib_nh {
unsigned char nh_scope;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int nh_weight;
- atomic_t nh_upper_bound;
+ int nh_power;
#endif
#ifdef CONFIG_IP_ROUTE_CLASSID
__u32 nh_tclassid;
@@ -118,7 +118,7 @@ struct fib_info {
#define fib_advmss fib_metrics[RTAX_ADVMSS-1]
int fib_nhs;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- int fib_weight;
+ int fib_power;
#endif
struct rcu_head rcu;
struct fib_nh fib_nh[0];
@@ -320,15 +320,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev);
int fib_sync_down_dev(struct net_device *dev, unsigned long event);
int fib_sync_down_addr(struct net *net, __be32 local);
int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
-
-extern u32 fib_multipath_secret __read_mostly;
-
-static inline int fib_multipath_hash(__be32 saddr, __be32 daddr)
-{
- return jhash_2words(saddr, daddr, fib_multipath_secret) >> 1;
-}
-
-void fib_select_multipath(struct fib_result *res, int hash);
+void fib_select_multipath(struct fib_result *res);
/* Exported by fib_trie.c */
void fib_trie_init(void);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0c49d2f3bbc0..064bd3caaa4f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -57,7 +57,8 @@ static unsigned int fib_info_cnt;
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-u32 fib_multipath_secret __read_mostly;
+
+static DEFINE_SPINLOCK(fib_multipath_lock);
#define for_nexthops(fi) { \
int nhsel; const struct fib_nh *nh; \
@@ -531,67 +532,7 @@ errout:
return ret;
}
-static void fib_rebalance(struct fib_info *fi)
-{
- int total;
- int w;
- struct in_device *in_dev;
-
- if (fi->fib_nhs < 2)
- return;
-
- total = 0;
- for_nexthops(fi) {
- if (nh->nh_flags & RTNH_F_DEAD)
- continue;
-
- in_dev = __in_dev_get_rcu(nh->nh_dev);
-
- if (in_dev &&
- IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
- nh->nh_flags & RTNH_F_LINKDOWN)
- continue;
-
- total += nh->nh_weight;
- } endfor_nexthops(fi);
-
- w = 0;
- change_nexthops(fi) {
- int upper_bound;
-
- in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev);
-
- if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
- upper_bound = -1;
- } else if (in_dev &&
- IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
- nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
- upper_bound = -1;
- } else {
- w += nexthop_nh->nh_weight;
- upper_bound = DIV_ROUND_CLOSEST(2147483648LL * w,
- total) - 1;
- }
-
- atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
- } endfor_nexthops(fi);
-
- net_get_random_once(&fib_multipath_secret,
- sizeof(fib_multipath_secret));
-}
-
-static inline void fib_add_weight(struct fib_info *fi,
- const struct fib_nh *nh)
-{
- fi->fib_weight += nh->nh_weight;
-}
-
-#else /* CONFIG_IP_ROUTE_MULTIPATH */
-
-#define fib_rebalance(fi) do { } while (0)
-#define fib_add_weight(fi, nh) do { } while (0)
-
-#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+#endif
static int fib_encap_match(struct net *net, u16 encap_type,
struct nlattr *encap,
@@ -1153,11 +1094,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
- fib_add_weight(fi, nexthop_nh);
} endfor_nexthops(fi)
- fib_rebalance(fi);
-
link_it:
ofi = fib_find_info(fi);
if (ofi) {
@@ -1379,6 +1317,12 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
break;
}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ spin_lock_bh(&fib_multipath_lock);
+ fi->fib_power -= nexthop_nh->nh_power;
+ nexthop_nh->nh_power = 0;
+ spin_unlock_bh(&fib_multipath_lock);
+#endif
dead++;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1401,8 +1345,6 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
}
ret++;
}
-
- fib_rebalance(fi);
}
return ret;
@@ -1525,15 +1467,20 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
!__in_dev_get_rtnl(dev))
continue;
alive++;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ spin_lock_bh(&fib_multipath_lock);
+ nexthop_nh->nh_power = 0;
+ nexthop_nh->nh_flags &= ~nh_flags;
+ spin_unlock_bh(&fib_multipath_lock);
+#else
nexthop_nh->nh_flags &= ~nh_flags;
+#endif
} endfor_nexthops(fi)
if (alive > 0) {
fi->fib_flags &= ~nh_flags;
ret++;
}
-
- fib_rebalance(fi);
}
return ret;
@@ -1541,19 +1488,62 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-void fib_select_multipath(struct fib_result *res, int hash)
+/*
+ * The algorithm is suboptimal, but it provides really
+ * fair weighted route distribution.
+ */
+void fib_select_multipath(struct fib_result *res)
{
struct fib_info *fi = res->fi;
+ struct in_device *in_dev;
+ int w;
- for_nexthops(fi) {
- if (hash > atomic_read(&nh->nh_upper_bound))
- continue;
+ spin_lock_bh(&fib_multipath_lock);
+ if (fi->fib_power <= 0) {
+ int power = 0;
+ change_nexthops(fi) {
+ in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev);
+ if (nexthop_nh->nh_flags & RTNH_F_DEAD)
+ continue;
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
+ continue;
+ power += nexthop_nh->nh_weight;
+ nexthop_nh->nh_power = nexthop_nh->nh_weight;
+ } endfor_nexthops(fi);
+ fi->fib_power = power;
+ if (power <= 0) {
+ spin_unlock_bh(&fib_multipath_lock);
+ /* Race condition: route has just become dead. */
+ res->nh_sel = 0;
+ return;
+ }
+ }
- res->nh_sel = nhsel;
- return;
+
+ /* w should be random number [0..fi->fib_power-1],
+ * it is pretty bad approximation.
+ */
+
+ w = jiffies % fi->fib_power;
+
+ change_nexthops(fi) {
+ if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
+ nexthop_nh->nh_power) {
+ w -= nexthop_nh->nh_power;
+ if (w <= 0) {
+ nexthop_nh->nh_power--;
+ fi->fib_power--;
+ res->nh_sel = nhsel;
+ spin_unlock_bh(&fib_multipath_lock);
+ return;
+ }
+ }
} endfor_nexthops(fi);
/* Race condition: route has just become dead. */
res->nh_sel = 0;
+ spin_unlock_bh(&fib_multipath_lock);
}
#endif
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0cca44476b1e..76ca4e75f785 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1658,12 +1658,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
__be32 daddr, __be32 saddr, u32 tos)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi && res->fi->fib_nhs > 1) {
- int h;
-
- h = fib_multipath_hash(saddr, daddr);
- fib_select_multipath(res, h);
- }
+ if (res->fi && res->fi->fib_nhs > 1)
+ fib_select_multipath(res);
#endif
/* create a routing cache entry */
@@ -2193,12 +2189,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
- int h;
-
- h = fib_multipath_hash(fl4->saddr, fl4->daddr);
- fib_select_multipath(&res, h);
- }
+ if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
+ fib_select_multipath(&res);
else
#endif
if (!res.prefixlen &&