aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c164
1 files changed, 101 insertions, 63 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 655d9eebe43e..f1f2e5aaa2d6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -114,6 +114,8 @@
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
+#include "fib_lookup.h"
+
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
@@ -1385,8 +1387,12 @@ static void rt_add_uncached_list(struct rtable *rt)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
+ struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rtable *rt = (struct rtable *) dst;
+ if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+ kfree(p);
+
if (!list_empty(&rt->rt_uncached)) {
struct uncached_list *ul = rt->rt_uncached_list;
@@ -1438,7 +1444,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
rt->rt_gateway = nh->nh_gw;
rt->rt_uses_gateway = 1;
}
- dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+ dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
+ if (fi->fib_metrics != &dst_default_metrics) {
+ rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+ atomic_inc(&fi->fib_metrics->refcnt);
+ }
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
@@ -1852,9 +1862,9 @@ static int ip_mkroute_input(struct sk_buff *skb,
*/
static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
+ u8 tos, struct net_device *dev,
+ struct fib_result *res)
{
- struct fib_result res;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct ip_tunnel_info *tun_info;
struct flowi4 fl4;
@@ -1884,8 +1894,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
- res.fi = NULL;
- res.table = NULL;
+ res->fi = NULL;
+ res->table = NULL;
if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
goto brd_input;
@@ -1921,17 +1931,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.daddr = daddr;
fl4.saddr = saddr;
fl4.flowi4_uid = sock_net_uid(net, NULL);
- err = fib_lookup(net, &fl4, &res, 0);
+ err = fib_lookup(net, &fl4, res, 0);
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
err = -EHOSTUNREACH;
goto no_route;
}
- if (res.type == RTN_BROADCAST)
+ if (res->type == RTN_BROADCAST)
goto brd_input;
- if (res.type == RTN_LOCAL) {
+ if (res->type == RTN_LOCAL) {
err = fib_validate_source(skb, saddr, daddr, tos,
0, dev, in_dev, &itag);
if (err < 0)
@@ -1943,10 +1953,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
err = -EHOSTUNREACH;
goto no_route;
}
- if (res.type != RTN_UNICAST)
+ if (res->type != RTN_UNICAST)
goto martian_destination;
- err = ip_mkroute_input(skb, &res, in_dev, daddr, saddr, tos);
+ err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
out: return err;
brd_input:
@@ -1960,14 +1970,14 @@ brd_input:
goto martian_source;
}
flags |= RTCF_BROADCAST;
- res.type = RTN_BROADCAST;
+ res->type = RTN_BROADCAST;
RT_CACHE_STAT_INC(in_brd);
local_input:
do_cache = false;
- if (res.fi) {
+ if (res->fi) {
if (!itag) {
- rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
err = 0;
@@ -1978,7 +1988,7 @@ local_input:
}
rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
- flags | RTCF_LOCAL, res.type,
+ flags | RTCF_LOCAL, res->type,
IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
if (!rth)
goto e_nobufs;
@@ -1988,18 +1998,18 @@ local_input:
rth->dst.tclassid = itag;
#endif
rth->rt_is_input = 1;
- if (res.table)
- rth->rt_table_id = res.table->tb_id;
+ if (res->table)
+ rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
- if (res.type == RTN_UNREACHABLE) {
+ if (res->type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
rth->rt_flags &= ~RTCF_LOCAL;
}
if (do_cache) {
- struct fib_nh *nh = &FIB_RES_NH(res);
+ struct fib_nh *nh = &FIB_RES_NH(*res);
rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
@@ -2019,9 +2029,9 @@ local_input:
no_route:
RT_CACHE_STAT_INC(in_no_route);
- res.type = RTN_UNREACHABLE;
- res.fi = NULL;
- res.table = NULL;
+ res->type = RTN_UNREACHABLE;
+ res->fi = NULL;
+ res->table = NULL;
goto local_input;
/*
@@ -2051,11 +2061,22 @@ martian_source:
int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev)
{
- int res;
+ struct fib_result res;
+ int err;
tos &= IPTOS_RT_MASK;
rcu_read_lock();
+ err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(ip_route_input_noref);
+
+/* called with rcu_read_lock held */
+int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev, struct fib_result *res)
+{
/* Multicast recognition logic is moved from route cache to here.
The problem was that too many Ethernet cards have broken/missing
hardware multicast filters :-( As result the host on multicasting
@@ -2070,6 +2091,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_multicast(daddr)) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
int our = 0;
+ int err = -EINVAL;
if (in_dev)
our = ip_check_mc_rcu(in_dev, daddr, saddr,
@@ -2085,7 +2107,6 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
ip_hdr(skb)->protocol);
}
- res = -EINVAL;
if (our
#ifdef CONFIG_IP_MROUTE
||
@@ -2093,17 +2114,14 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
IN_DEV_MFORWARD(in_dev))
#endif
) {
- res = ip_route_input_mc(skb, daddr, saddr,
+ err = ip_route_input_mc(skb, daddr, saddr,
tos, dev, our);
}
- rcu_read_unlock();
- return res;
+ return err;
}
- res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
- rcu_read_unlock();
- return res;
+
+ return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
}
-EXPORT_SYMBOL(ip_route_input_noref);
/* called with rcu_read_lock() */
static struct rtable *__mkroute_output(const struct fib_result *res,
@@ -2246,29 +2264,40 @@ add:
* Major route resolver routine.
*/
-struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
- const struct sk_buff *skb)
+struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
+ const struct sk_buff *skb)
{
- struct net_device *dev_out = NULL;
__u8 tos = RT_FL_TOS(fl4);
- unsigned int flags = 0;
struct fib_result res;
struct rtable *rth;
- int orig_oif;
- int err = -ENETUNREACH;
res.tclassid = 0;
res.fi = NULL;
res.table = NULL;
- orig_oif = fl4->flowi4_oif;
-
fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
rcu_read_lock();
+ rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
+ rcu_read_unlock();
+
+ return rth;
+}
+EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
+
+struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
+ struct fib_result *res,
+ const struct sk_buff *skb)
+{
+ struct net_device *dev_out = NULL;
+ int orig_oif = fl4->flowi4_oif;
+ unsigned int flags = 0;
+ struct rtable *rth;
+ int err = -ENETUNREACH;
+
if (fl4->saddr) {
rth = ERR_PTR(-EINVAL);
if (ipv4_is_multicast(fl4->saddr) ||
@@ -2354,15 +2383,15 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
dev_out = net->loopback_dev;
fl4->flowi4_oif = LOOPBACK_IFINDEX;
- res.type = RTN_LOCAL;
+ res->type = RTN_LOCAL;
flags |= RTCF_LOCAL;
goto make_route;
}
- err = fib_lookup(net, fl4, &res, 0);
+ err = fib_lookup(net, fl4, res, 0);
if (err) {
- res.fi = NULL;
- res.table = NULL;
+ res->fi = NULL;
+ res->table = NULL;
if (fl4->flowi4_oif &&
(ipv4_is_multicast(fl4->daddr) ||
!netif_index_is_l3_master(net, fl4->flowi4_oif))) {
@@ -2387,43 +2416,41 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
if (fl4->saddr == 0)
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_LINK);
- res.type = RTN_UNICAST;
+ res->type = RTN_UNICAST;
goto make_route;
}
rth = ERR_PTR(err);
goto out;
}
- if (res.type == RTN_LOCAL) {
+ if (res->type == RTN_LOCAL) {
if (!fl4->saddr) {
- if (res.fi->fib_prefsrc)
- fl4->saddr = res.fi->fib_prefsrc;
+ if (res->fi->fib_prefsrc)
+ fl4->saddr = res->fi->fib_prefsrc;
else
fl4->saddr = fl4->daddr;
}
/* L3 master device is the loopback for that domain */
- dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(res)) ? :
+ dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
net->loopback_dev;
fl4->flowi4_oif = dev_out->ifindex;
flags |= RTCF_LOCAL;
goto make_route;
}
- fib_select_path(net, &res, fl4, skb);
+ fib_select_path(net, res, fl4, skb);
- dev_out = FIB_RES_DEV(res);
+ dev_out = FIB_RES_DEV(*res);
fl4->flowi4_oif = dev_out->ifindex;
make_route:
- rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
+ rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
out:
- rcu_read_unlock();
return rth;
}
-EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
{
@@ -2517,18 +2544,18 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
}
EXPORT_SYMBOL_GPL(ip_route_output_flow);
+/* called with rcu_read_lock held */
static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
- u32 seq, int event)
+ u32 seq, struct rtable *rt)
{
- struct rtable *rt = skb_rtable(skb);
struct rtmsg *r;
struct nlmsghdr *nlh;
unsigned long expires = 0;
u32 error;
u32 metrics[RTAX_MAX];
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), 0);
+ nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
if (!nlh)
return -EMSGSIZE;
@@ -2636,6 +2663,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct net *net = sock_net(in_skb->sk);
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
+ struct fib_result res = {};
struct rtable *rt = NULL;
struct flowi4 fl4;
__be32 dst = 0;
@@ -2692,10 +2720,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
+ rcu_read_lock();
+
if (iif) {
struct net_device *dev;
- dev = __dev_get_by_index(net, iif);
+ dev = dev_get_by_index_rcu(net, iif);
if (!dev) {
err = -ENODEV;
goto errout_free;
@@ -2704,14 +2734,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
skb->protocol = htons(ETH_P_IP);
skb->dev = dev;
skb->mark = mark;
- err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
+ err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
+ dev, &res);
rt = skb_rtable(skb);
if (err == 0 && rt->dst.error)
err = -rt->dst.error;
} else {
- rt = ip_route_output_key(net, &fl4);
-
+ rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
err = 0;
if (IS_ERR(rt))
err = PTR_ERR(rt);
@@ -2720,24 +2750,32 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (err)
goto errout_free;
- skb_dst_set(skb, &rt->dst);
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
table_id = rt->rt_table_id;
- err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
- NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- RTM_NEWROUTE);
+ if (rtm->rtm_flags & RTM_F_FIB_MATCH)
+ err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
+ rt->rt_type, res.prefix, res.prefixlen,
+ fl4.flowi4_tos, res.fi, 0);
+ else
+ err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
+ NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
+ rt);
if (err < 0)
goto errout_free;
+ rcu_read_unlock();
+
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
return err;
errout_free:
+ rcu_read_unlock();
kfree_skb(skb);
goto errout;
}