From 2fccbd9cc0fdca649b01f1e2d96e5ef85256341a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 24 Sep 2012 15:53:29 -0400 Subject: sunrpc: server back channel needs no rpcbind method XPRT_BOUND is set on server backchannel xprts by xs_setup_bc_tcp() (using xprt_set_bound()), and is never cleared, so ->rpcbind() will never need to be called. Reported-by: "Myklebust, Trond" Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtsock.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ffd50348a50..5d6b0daf743 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2534,7 +2534,6 @@ static struct rpc_xprt_ops bc_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, .alloc_slot = xprt_alloc_slot, - .rpcbind = xs_local_rpcbind, .buf_alloc = bc_malloc, .buf_free = bc_free, .send_request = bc_send_request, -- cgit v1.2.3 From 119363c7dc2bcc0c33c255a7b4979c8c0fdc1896 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Apr 2013 16:29:30 +0200 Subject: cfg80211: add support for per-chain signal strength reporting Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index afa283841e8..f687a8d0d02 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3376,6 +3376,32 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, return true; } +static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal, + int id) +{ + void *attr; + int i = 0; + + if (!mask) + return true; + + attr = nla_nest_start(msg, id); + if (!attr) + return false; + + for (i = 0; i < IEEE80211_MAX_CHAINS; i++) { + if (!(mask & BIT(i))) + continue; + + if (nla_put_u8(msg, i, signal[i])) + return false; + } + + nla_nest_end(msg, attr); + + return true; +} + static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -3447,6 +3473,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, default: break; } + if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL) { + if (!nl80211_put_signal(msg, sinfo->chains, + sinfo->chain_signal, + NL80211_STA_INFO_CHAIN_SIGNAL)) + goto nla_put_failure; + } + if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL_AVG) { + if (!nl80211_put_signal(msg, sinfo->chains, + sinfo->chain_signal_avg, + NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) + goto nla_put_failure; + } if (sinfo->filled & STATION_INFO_TX_BITRATE) { if (!nl80211_put_sta_rate(msg, &sinfo->txrate, NL80211_STA_INFO_TX_BITRATE)) -- cgit v1.2.3 From ef0621e805f9ef76eaf31ce6205028fe467e9ca9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Apr 2013 16:29:31 +0200 Subject: mac80211: add support for per-chain signal strength reporting Signed-off-by: Felix Fietkau [fix unit documentation] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 13 ++++++++++++- net/mac80211/rx.c | 14 ++++++++++++++ net/mac80211/sta_info.c | 2 ++ net/mac80211/sta_info.h | 5 +++++ 4 files changed, 33 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1a89c80e640..1f51bdfe574 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -444,7 +444,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) struct ieee80211_local *local = sdata->local; struct timespec uptime; u64 packets = 0; - int ac; + int i, ac; sinfo->generation = sdata->local->sta_generation; @@ -488,6 +488,17 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->signal = (s8)sta->last_signal; sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } + if (sta->chains) { + sinfo->filled |= STATION_INFO_CHAIN_SIGNAL | + STATION_INFO_CHAIN_SIGNAL_AVG; + + sinfo->chains = sta->chains; + for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { + sinfo->chain_signal[i] = sta->chain_signal_last[i]; + sinfo->chain_signal_avg[i] = + (s8) -ewma_read(&sta->chain_signal_avg[i]); + } + } sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); sta_set_rate_info_rx(sta, &sinfo->rxrate); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c8447af76ea..22e412b0767 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1372,6 +1372,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + int i; if (!sta) return RX_CONTINUE; @@ -1422,6 +1423,19 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) ewma_add(&sta->avg_signal, -status->signal); } + if (status->chains) { + sta->chains = status->chains; + for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { + int signal = status->chain_signal[i]; + + if (!(status->chains & BIT(i))) + continue; + + sta->chain_signal_last[i] = signal; + ewma_add(&sta->chain_signal_avg[i], -signal); + } + } + /* * Change STA power saving mode only at the end of a frame * exchange sequence. diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 11216bc13b2..a04c5671d7f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -358,6 +358,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, do_posix_clock_monotonic_gettime(&uptime); sta->last_connected = uptime.tv_sec; ewma_init(&sta->avg_signal, 1024, 8); + for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++) + ewma_init(&sta->chain_signal_avg[i], 1024, 8); if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index adc30045f99..41c28b977f7 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -344,6 +344,11 @@ struct sta_info { int last_signal; struct ewma avg_signal; int last_ack_signal; + + u8 chains; + s8 chain_signal_last[IEEE80211_MAX_CHAINS]; + struct ewma chain_signal_avg[IEEE80211_MAX_CHAINS]; + /* Plus 1 for non-QoS frames */ __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; -- cgit v1.2.3 From 55300a13d2ca1d59f659cf00b9d8dc93ea225882 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Tue, 23 Apr 2013 09:54:21 +0300 Subject: cfg80211: add 60GHz regulatory class Add regulatory class for 60GHz band, according to the last specification. Signed-off-by: Vladimir Kondratiev Signed-off-by: Johannes Berg --- net/wireless/util.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index f5ad4d94ba8..b11052be09b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1169,6 +1169,9 @@ bool ieee80211_operating_class_to_band(u8 operating_class, case 84: *band = IEEE80211_BAND_2GHZ; return true; + case 180: + *band = IEEE80211_BAND_60GHZ; + return true; } return false; -- cgit v1.2.3 From fb4e156886ce6e8309e912d8b370d192330d19d3 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 28 Apr 2013 16:22:06 -0700 Subject: nl80211: Add generic netlink module alias for cfg80211/nl80211 To support auto-loading of wireless modules from netlink users, add module alias for nl80211 family. This also adds NL80211_GENL_NAME constant to define the "nl80211" netlink family name as part of uapi. Signed-off-by: Marcel Holtmann Signed-off-by: Johannes Berg --- net/wireless/core.c | 1 + net/wireless/nl80211.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 84c9ad7e1dc..68f0c96c056 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -34,6 +34,7 @@ MODULE_AUTHOR("Johannes Berg"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("wireless configuration support"); +MODULE_ALIAS_GENL_FAMILY(NL80211_GENL_NAME); /* RCU-protected (and cfg80211_mutex for writers) */ LIST_HEAD(cfg80211_rdev_list); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f687a8d0d02..9cdcd9ec331 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -37,10 +37,10 @@ static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb, /* the netlink family */ static struct genl_family nl80211_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ - .name = "nl80211", /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ + .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ + .name = NL80211_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ .maxattr = NL80211_ATTR_MAX, .netnsok = true, .pre_doit = nl80211_pre_doit, -- cgit v1.2.3 From 04a161f4609dfa387313456fa7ea469fff12cc0d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 3 May 2013 09:35:35 +0200 Subject: mac80211: fix HT beacon-based channel switch handling When an HT AP is advertising channel switch in a beacon, it doesn't (and shouldn't, according to 802.11-2012 Table 8-20) include a secondary channel offset element. The only possible interpretation is that the previous secondary channel offset remains valid, so use that when switching channel based only on beacon information. VHT requires the Wide Bandwidth Channel Switch subelement to be present in the Channel Switch Wrapper element, so the code for that is probably ok (see 802.11ac Draft 4, 8.4.2.165.) Reported-by: Sujith Manoharan Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 29620bfc7a6..a8016c02a75 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1015,7 +1015,8 @@ static void ieee80211_chswitch_timer(unsigned long data) static void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, - u64 timestamp, struct ieee802_11_elems *elems) + u64 timestamp, struct ieee802_11_elems *elems, + bool beacon) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -1032,6 +1033,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def new_vht_chandef = {}; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; + const struct ieee80211_ht_operation *ht_oper; int secondary_channel_offset = -1; ASSERT_MGD_MTX(ifmgd); @@ -1048,11 +1050,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; + ht_oper = elems->ht_operation; if (ifmgd->flags & (IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_40MHZ)) { sec_chan_offs = NULL; wide_bw_chansw_ie = NULL; + /* only used for bandwidth here */ + ht_oper = NULL; } if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT) @@ -1094,10 +1099,20 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; } - if (sec_chan_offs) { + if (!beacon && sec_chan_offs) { secondary_channel_offset = sec_chan_offs->sec_chan_offs; + } else if (beacon && ht_oper) { + secondary_channel_offset = + ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; } else if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { - /* if HT is enabled and the IE not present, it's still HT */ + /* + * If it's not a beacon, HT is enabled and the IE not present, + * it's 20 MHz, 802.11-2012 8.5.2.6: + * This element [the Secondary Channel Offset Element] is + * present when switching to a 40 MHz channel. It may be + * present when switching to a 20 MHz channel (in which + * case the secondary channel offset is set to SCN). + */ secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; } @@ -2796,7 +2811,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->iflist_mtx); } - ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, elems); + ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, + elems, true); } @@ -3210,7 +3226,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, - &elems); + &elems, false); } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { ies_len = skb->len - offsetof(struct ieee80211_mgmt, @@ -3232,7 +3248,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, - &elems); + &elems, false); } break; } -- cgit v1.2.3 From 4325f6caad98c075b39f0eaaac6693a0dd43f646 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 May 2013 13:09:08 +0200 Subject: wireless: move crypto constants to ieee80211.h mac80211 and the Intel drivers all define crypto constants, move them to ieee80211.h instead. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/aes_ccm.c | 6 ++--- net/mac80211/key.c | 24 +++++++++--------- net/mac80211/key.h | 15 ++--------- net/mac80211/rx.c | 12 ++++----- net/mac80211/wep.c | 48 ++++++++++++++++++----------------- net/mac80211/wpa.c | 68 ++++++++++++++++++++++++++------------------------ 6 files changed, 84 insertions(+), 89 deletions(-) (limited to 'net') diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 0785e95c992..be7614b9ed2 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -85,7 +85,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, *cpos++ = *pos++ ^ e[i]; } - for (i = 0; i < CCMP_MIC_LEN; i++) + for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++) mic[i] = b[i] ^ s_0[i]; } @@ -123,7 +123,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, crypto_cipher_encrypt_one(tfm, a, a); } - for (i = 0; i < CCMP_MIC_LEN; i++) { + for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++) { if ((mic[i] ^ s_0[i]) != a[i]) return -1; } @@ -138,7 +138,7 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]) tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); if (!IS_ERR(tfm)) - crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN); + crypto_cipher_setkey(tfm, key, WLAN_KEY_LEN_CCMP); return tfm; } diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 67059b88fea..e39cc91d0cf 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -335,12 +335,12 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, switch (cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: - key->conf.iv_len = WEP_IV_LEN; - key->conf.icv_len = WEP_ICV_LEN; + key->conf.iv_len = IEEE80211_WEP_IV_LEN; + key->conf.icv_len = IEEE80211_WEP_ICV_LEN; break; case WLAN_CIPHER_SUITE_TKIP: - key->conf.iv_len = TKIP_IV_LEN; - key->conf.icv_len = TKIP_ICV_LEN; + key->conf.iv_len = IEEE80211_TKIP_IV_LEN; + key->conf.icv_len = IEEE80211_TKIP_ICV_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS; i++) { key->u.tkip.rx[i].iv32 = @@ -352,13 +352,13 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, spin_lock_init(&key->u.tkip.txlock); break; case WLAN_CIPHER_SUITE_CCMP: - key->conf.iv_len = CCMP_HDR_LEN; - key->conf.icv_len = CCMP_MIC_LEN; + key->conf.iv_len = IEEE80211_CCMP_HDR_LEN; + key->conf.icv_len = IEEE80211_CCMP_MIC_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) - for (j = 0; j < CCMP_PN_LEN; j++) + for (j = 0; j < IEEE80211_CCMP_PN_LEN; j++) key->u.ccmp.rx_pn[i][j] = - seq[CCMP_PN_LEN - j - 1]; + seq[IEEE80211_CCMP_PN_LEN - j - 1]; } /* * Initialize AES key state here as an optimization so that @@ -375,9 +375,9 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, key->conf.iv_len = 0; key->conf.icv_len = sizeof(struct ieee80211_mmie); if (seq) - for (j = 0; j < CMAC_PN_LEN; j++) + for (j = 0; j < IEEE80211_CMAC_PN_LEN; j++) key->u.aes_cmac.rx_pn[j] = - seq[CMAC_PN_LEN - j - 1]; + seq[IEEE80211_CMAC_PN_LEN - j - 1]; /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. @@ -740,13 +740,13 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.ccmp.rx_pn[tid]; - memcpy(seq->ccmp.pn, pn, CCMP_PN_LEN); + memcpy(seq->ccmp.pn, pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; - memcpy(seq->aes_cmac.pn, pn, CMAC_PN_LEN); + memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN); break; } } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index e8de3e6d780..036d57e76a5 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -19,17 +19,6 @@ #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 -#define WEP_IV_LEN 4 -#define WEP_ICV_LEN 4 -#define ALG_CCMP_KEY_LEN 16 -#define CCMP_HDR_LEN 8 -#define CCMP_MIC_LEN 8 -#define CCMP_TK_LEN 16 -#define CCMP_PN_LEN 6 -#define TKIP_IV_LEN 8 -#define TKIP_ICV_LEN 4 -#define CMAC_PN_LEN 6 - struct ieee80211_local; struct ieee80211_sub_if_data; struct sta_info; @@ -93,13 +82,13 @@ struct ieee80211_key { * frames and the last counter is used with Robust * Management frames. */ - u8 rx_pn[IEEE80211_NUM_TIDS + 1][CCMP_PN_LEN]; + u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_CCMP_PN_LEN]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCCMPReplays */ } ccmp; struct { atomic64_t tx_pn; - u8 rx_pn[CMAC_PN_LEN]; + u8 rx_pn[IEEE80211_CMAC_PN_LEN]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCMACReplays */ u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 22e412b0767..6e2c8c5236c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1622,7 +1622,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) entry->ccmp = 1; memcpy(entry->last_pn, rx->key->u.ccmp.rx_pn[queue], - CCMP_PN_LEN); + IEEE80211_CCMP_PN_LEN); } return RX_QUEUED; } @@ -1641,21 +1641,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) * (IEEE 802.11i, 8.3.3.4.5) */ if (entry->ccmp) { int i; - u8 pn[CCMP_PN_LEN], *rpn; + u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; int queue; if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP) return RX_DROP_UNUSABLE; - memcpy(pn, entry->last_pn, CCMP_PN_LEN); - for (i = CCMP_PN_LEN - 1; i >= 0; i--) { + memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); + for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { pn[i]++; if (pn[i]) break; } queue = rx->security_idx; rpn = rx->key->u.ccmp.rx_pn[queue]; - if (memcmp(pn, rpn, CCMP_PN_LEN)) + if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN)) return RX_DROP_UNUSABLE; - memcpy(entry->last_pn, pn, CCMP_PN_LEN); + memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); } skb_pull(rx->skb, ieee80211_hdrlen(fc)); diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index c04d401dae9..6ee2b586357 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -28,7 +28,7 @@ int ieee80211_wep_init(struct ieee80211_local *local) { /* start WEP IV from a random value */ - get_random_bytes(&local->wep_iv, WEP_IV_LEN); + get_random_bytes(&local->wep_iv, IEEE80211_WEP_IV_LEN); local->wep_tx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(local->wep_tx_tfm)) { @@ -98,20 +98,21 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local, hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if (WARN_ON(skb_tailroom(skb) < WEP_ICV_LEN || - skb_headroom(skb) < WEP_IV_LEN)) + if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN || + skb_headroom(skb) < IEEE80211_WEP_IV_LEN)) return NULL; hdrlen = ieee80211_hdrlen(hdr->frame_control); - newhdr = skb_push(skb, WEP_IV_LEN); - memmove(newhdr, newhdr + WEP_IV_LEN, hdrlen); + newhdr = skb_push(skb, IEEE80211_WEP_IV_LEN); + memmove(newhdr, newhdr + IEEE80211_WEP_IV_LEN, hdrlen); /* the HW only needs room for the IV, but not the actual IV */ if (info->control.hw_key && (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) return newhdr + hdrlen; - skb_set_network_header(skb, skb_network_offset(skb) + WEP_IV_LEN); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_WEP_IV_LEN); ieee80211_wep_get_iv(local, keylen, keyidx, newhdr + hdrlen); return newhdr + hdrlen; } @@ -125,8 +126,8 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local, unsigned int hdrlen; hdrlen = ieee80211_hdrlen(hdr->frame_control); - memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, WEP_IV_LEN); + memmove(skb->data + IEEE80211_WEP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_WEP_IV_LEN); } @@ -146,7 +147,7 @@ int ieee80211_wep_encrypt_data(struct crypto_cipher *tfm, u8 *rc4key, put_unaligned(icv, (__le32 *)(data + data_len)); crypto_cipher_setkey(tfm, rc4key, klen); - for (i = 0; i < data_len + WEP_ICV_LEN; i++) + for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++) crypto_cipher_encrypt_one(tfm, data + i, data + i); return 0; @@ -172,7 +173,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, if (!iv) return -1; - len = skb->len - (iv + WEP_IV_LEN - skb->data); + len = skb->len - (iv + IEEE80211_WEP_IV_LEN - skb->data); /* Prepend 24-bit IV to RC4 key */ memcpy(rc4key, iv, 3); @@ -181,10 +182,10 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, memcpy(rc4key + 3, key, keylen); /* Add room for ICV */ - skb_put(skb, WEP_ICV_LEN); + skb_put(skb, IEEE80211_WEP_ICV_LEN); return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3, - iv + WEP_IV_LEN, len); + iv + IEEE80211_WEP_IV_LEN, len); } @@ -201,11 +202,11 @@ int ieee80211_wep_decrypt_data(struct crypto_cipher *tfm, u8 *rc4key, return -1; crypto_cipher_setkey(tfm, rc4key, klen); - for (i = 0; i < data_len + WEP_ICV_LEN; i++) + for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++) crypto_cipher_decrypt_one(tfm, data + i, data + i); crc = cpu_to_le32(~crc32_le(~0, data, data_len)); - if (memcmp(&crc, data + data_len, WEP_ICV_LEN) != 0) + if (memcmp(&crc, data + data_len, IEEE80211_WEP_ICV_LEN) != 0) /* ICV mismatch */ return -1; @@ -237,10 +238,10 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, return -1; hdrlen = ieee80211_hdrlen(hdr->frame_control); - if (skb->len < hdrlen + WEP_IV_LEN + WEP_ICV_LEN) + if (skb->len < hdrlen + IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN) return -1; - len = skb->len - hdrlen - WEP_IV_LEN - WEP_ICV_LEN; + len = skb->len - hdrlen - IEEE80211_WEP_IV_LEN - IEEE80211_WEP_ICV_LEN; keyidx = skb->data[hdrlen + 3] >> 6; @@ -256,16 +257,16 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, memcpy(rc4key + 3, key->conf.key, key->conf.keylen); if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, - skb->data + hdrlen + WEP_IV_LEN, - len)) + skb->data + hdrlen + + IEEE80211_WEP_IV_LEN, len)) ret = -1; /* Trim ICV */ - skb_trim(skb, skb->len - WEP_ICV_LEN); + skb_trim(skb, skb->len - IEEE80211_WEP_ICV_LEN); /* Remove IV */ - memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, WEP_IV_LEN); + memmove(skb->data + IEEE80211_WEP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_WEP_IV_LEN); return ret; } @@ -305,13 +306,14 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) return RX_DROP_UNUSABLE; } else if (!(status->flag & RX_FLAG_IV_STRIPPED)) { - if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + WEP_IV_LEN)) + if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + + IEEE80211_WEP_IV_LEN)) return RX_DROP_UNUSABLE; if (rx->sta && ieee80211_wep_is_weak_iv(rx->skb, rx->key)) rx->sta->wep_weak_iv_count++; ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ - if (pskb_trim(rx->skb, rx->skb->len - WEP_ICV_LEN)) + if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) return RX_DROP_UNUSABLE; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index c7c6d644486..c9edfcb7a13 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -62,10 +62,10 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) tail = MICHAEL_MIC_LEN; if (!info->control.hw_key) - tail += TKIP_ICV_LEN; + tail += IEEE80211_TKIP_ICV_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < TKIP_IV_LEN)) + skb_headroom(skb) < IEEE80211_TKIP_IV_LEN)) return TX_DROP; key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; @@ -198,15 +198,16 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) tail = 0; else - tail = TKIP_ICV_LEN; + tail = IEEE80211_TKIP_ICV_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < TKIP_IV_LEN)) + skb_headroom(skb) < IEEE80211_TKIP_IV_LEN)) return -1; - pos = skb_push(skb, TKIP_IV_LEN); - memmove(pos, pos + TKIP_IV_LEN, hdrlen); - skb_set_network_header(skb, skb_network_offset(skb) + TKIP_IV_LEN); + pos = skb_push(skb, IEEE80211_TKIP_IV_LEN); + memmove(pos, pos + IEEE80211_TKIP_IV_LEN, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_TKIP_IV_LEN); pos += hdrlen; /* the HW only needs room for the IV, but not the actual IV */ @@ -227,7 +228,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) return 0; /* Add room for ICV */ - skb_put(skb, TKIP_ICV_LEN); + skb_put(skb, IEEE80211_TKIP_ICV_LEN); return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm, key, skb, pos, len); @@ -290,11 +291,11 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* Trim ICV */ - skb_trim(skb, skb->len - TKIP_ICV_LEN); + skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); /* Remove IV */ - memmove(skb->data + TKIP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, TKIP_IV_LEN); + memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_TKIP_IV_LEN); return RX_CONTINUE; } @@ -337,9 +338,9 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, else qos_tid = 0; - data_len = skb->len - hdrlen - CCMP_HDR_LEN; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN; if (encrypted) - data_len -= CCMP_MIC_LEN; + data_len -= IEEE80211_CCMP_MIC_LEN; /* First block, b_0 */ b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */ @@ -348,7 +349,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, */ b_0[1] = qos_tid | (mgmt << 4); memcpy(&b_0[2], hdr->addr2, ETH_ALEN); - memcpy(&b_0[8], pn, CCMP_PN_LEN); + memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN); /* l(m) */ put_unaligned_be16(data_len, &b_0[14]); @@ -424,15 +425,16 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) tail = 0; else - tail = CCMP_MIC_LEN; + tail = IEEE80211_CCMP_MIC_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < CCMP_HDR_LEN)) + skb_headroom(skb) < IEEE80211_CCMP_HDR_LEN)) return -1; - pos = skb_push(skb, CCMP_HDR_LEN); - memmove(pos, pos + CCMP_HDR_LEN, hdrlen); - skb_set_network_header(skb, skb_network_offset(skb) + CCMP_HDR_LEN); + pos = skb_push(skb, IEEE80211_CCMP_HDR_LEN); + memmove(pos, pos + IEEE80211_CCMP_HDR_LEN, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_CCMP_HDR_LEN); /* the HW only needs room for the IV, but not the actual IV */ if (info->control.hw_key && @@ -457,10 +459,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) return 0; - pos += CCMP_HDR_LEN; + pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, scratch, 0); ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, pos, len, - pos, skb_put(skb, CCMP_MIC_LEN)); + pos, skb_put(skb, IEEE80211_CCMP_MIC_LEN)); return 0; } @@ -490,7 +492,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - u8 pn[CCMP_PN_LEN]; + u8 pn[IEEE80211_CCMP_PN_LEN]; int data_len; int queue; @@ -500,12 +502,13 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) !ieee80211_is_robust_mgmt_frame(hdr)) return RX_CONTINUE; - data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - + IEEE80211_CCMP_MIC_LEN; if (!rx->sta || data_len < 0) return RX_DROP_UNUSABLE; if (status->flag & RX_FLAG_DECRYPTED) { - if (!pskb_may_pull(rx->skb, hdrlen + CCMP_HDR_LEN)) + if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_CCMP_HDR_LEN)) return RX_DROP_UNUSABLE; } else { if (skb_linearize(rx->skb)) @@ -516,7 +519,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) queue = rx->security_idx; - if (memcmp(pn, key->u.ccmp.rx_pn[queue], CCMP_PN_LEN) <= 0) { + if (memcmp(pn, key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN) <= 0) { key->u.ccmp.replays++; return RX_DROP_UNUSABLE; } @@ -528,19 +531,20 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_aes_ccm_decrypt( key->u.ccmp.tfm, scratch, - skb->data + hdrlen + CCMP_HDR_LEN, data_len, - skb->data + skb->len - CCMP_MIC_LEN, - skb->data + hdrlen + CCMP_HDR_LEN)) + skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, + data_len, + skb->data + skb->len - IEEE80211_CCMP_MIC_LEN, + skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN)) return RX_DROP_UNUSABLE; } - memcpy(key->u.ccmp.rx_pn[queue], pn, CCMP_PN_LEN); + memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); /* Remove CCMP header and MIC */ - if (pskb_trim(skb, skb->len - CCMP_MIC_LEN)) + if (pskb_trim(skb, skb->len - IEEE80211_CCMP_MIC_LEN)) return RX_DROP_UNUSABLE; - memmove(skb->data + CCMP_HDR_LEN, skb->data, hdrlen); - skb_pull(skb, CCMP_HDR_LEN); + memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_CCMP_HDR_LEN); return RX_CONTINUE; } -- cgit v1.2.3 From bd500af223c9aed7083730b7044d53162065e418 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2013 21:09:46 +0200 Subject: mac80211: write memcpy differently for smatch There's no real difference between *array and array, but the former confuses smatch so write it differently. The generated code is exactly the same. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1f51bdfe574..66989458f5f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -739,7 +739,7 @@ static void ieee80211_get_et_strings(struct wiphy *wiphy, if (sset == ETH_SS_STATS) { sz_sta_stats = sizeof(ieee80211_gstrings_sta_stats); - memcpy(data, *ieee80211_gstrings_sta_stats, sz_sta_stats); + memcpy(data, ieee80211_gstrings_sta_stats, sz_sta_stats); } drv_get_et_strings(sdata, sset, &(data[sz_sta_stats])); } -- cgit v1.2.3 From 7ade7036043e2e8e2831ae189ce5c248386062f1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 13 May 2013 11:37:30 +0200 Subject: cfg80211: use C99 initialisers to simplify code a bit Use C99 initialisers for the auth, deauth and disassoc requests to simplify the code. Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 0c7b7dd855f..c21e32f9549 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -264,7 +264,16 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, const u8 *sae_data, int sae_data_len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_auth_request req; + struct cfg80211_auth_request req = { + .ie = ie, + .ie_len = ie_len, + .sae_data = sae_data, + .sae_data_len = sae_data_len, + .auth_type = auth_type, + .key = key, + .key_len = key_len, + .key_idx = key_idx, + }; int err; ASSERT_WDEV_LOCK(wdev); @@ -277,18 +286,8 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, ether_addr_equal(bssid, wdev->current_bss->pub.bssid)) return -EALREADY; - memset(&req, 0, sizeof(req)); - - req.ie = ie; - req.ie_len = ie_len; - req.sae_data = sae_data; - req.sae_data_len = sae_data_len; - req.auth_type = auth_type; req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - req.key = key; - req.key_len = key_len; - req.key_idx = key_idx; if (!req.bss) return -ENOENT; @@ -480,7 +479,12 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, bool local_state_change) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_disassoc_request req; + struct cfg80211_disassoc_request req = { + .reason_code = reason, + .local_state_change = local_state_change, + .ie = ie, + .ie_len = ie_len, + }; ASSERT_WDEV_LOCK(wdev); @@ -490,11 +494,6 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, if (WARN(!wdev->current_bss, "sme_state=%d\n", wdev->sme_state)) return -ENOTCONN; - memset(&req, 0, sizeof(req)); - req.reason_code = reason; - req.local_state_change = local_state_change; - req.ie = ie; - req.ie_len = ie_len; if (ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) req.bss = &wdev->current_bss->pub; else @@ -523,24 +522,21 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_deauth_request req; u8 bssid[ETH_ALEN]; + struct cfg80211_deauth_request req = { + .reason_code = WLAN_REASON_DEAUTH_LEAVING, + .bssid = bssid, + }; ASSERT_WDEV_LOCK(wdev); if (!rdev->ops->deauth) return; - memset(&req, 0, sizeof(req)); - req.reason_code = WLAN_REASON_DEAUTH_LEAVING; - req.ie = NULL; - req.ie_len = 0; - if (!wdev->current_bss) return; memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); - req.bssid = bssid; rdev_deauth(rdev, dev, &req); if (wdev->current_bss) { -- cgit v1.2.3 From 6e16d90b5218307db805e6b3e0b06d3946eb8c4c Mon Sep 17 00:00:00 2001 From: Colleen Twitty Date: Wed, 8 May 2013 11:45:59 -0700 Subject: cfg80211: Userspace may inform kernel of mesh auth method. Authentication takes place in userspace, but the beacon is generated in the kernel. Allow userspace to inform the kernel of the authentication method so the appropriate mesh config IE can be set prior to beacon generation when joining the MBSS. Signed-off-by: Colleen Twitty Signed-off-by: Johannes Berg --- net/wireless/mesh.c | 1 + net/wireless/nl80211.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'net') diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0bb93f3061a..9546ad21055 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -82,6 +82,7 @@ const struct mesh_setup default_mesh_setup = { .sync_method = IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET, .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP, .path_metric = IEEE80211_PATH_METRIC_AIRTIME, + .auth_id = 0, /* open */ .ie = NULL, .ie_len = 0, .is_secure = false, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9cdcd9ec331..5f10f7acfa0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4672,6 +4672,7 @@ static const struct nla_policy [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, + [NL80211_MESH_SETUP_AUTH_PROTOCOL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, @@ -4857,6 +4858,13 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, if (setup->is_secure) setup->user_mpm = true; + if (tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]) { + if (!setup->user_mpm) + return -EINVAL; + setup->auth_id = + nla_get_u8(tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]); + } + return 0; } -- cgit v1.2.3 From 0d4261ad5d0028b26cd88e645b4507eed8aab3f7 Mon Sep 17 00:00:00 2001 From: Colleen Twitty Date: Wed, 8 May 2013 11:46:00 -0700 Subject: mac80211: enable Auth Protocol Identifier on mesh config. Previously the mesh_auth_id was disabled. Instead set the correct mesh authentication bit based on the mesh setup. Signed-off-by: Colleen Twitty Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 1 + net/mac80211/mesh.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 66989458f5f..eb421905104 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1746,6 +1746,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, ifmsh->mesh_pp_id = setup->path_sel_proto; ifmsh->mesh_pm_id = setup->path_metric; ifmsh->user_mpm = setup->user_mpm; + ifmsh->mesh_auth_id = setup->auth_id; ifmsh->security = IEEE80211_MESH_SEC_NONE; if (setup->is_authenticated) ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6952760881c..c13db9ad394 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -748,7 +748,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_configure_filter(local); ifmsh->mesh_cc_id = 0; /* Disabled */ - ifmsh->mesh_auth_id = 0; /* Disabled */ /* register sync ops from extensible synchronization framework */ ifmsh->sync_ops = ieee80211_mesh_sync_ops_get(ifmsh->mesh_sp_id); ifmsh->adjusting_tbtt = false; -- cgit v1.2.3 From ce85788846ec19dcb7bef0dcbcf83fb64630f426 Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Mon, 6 May 2013 17:17:04 +0300 Subject: mac80211: enable power save only if DTIM period is available Generally, the DTIM period is available after a beacon has been received, and if no beacon has been received enabling powersave is problematic anyway for synchronisation. Since some drivers may require the DTIM period for powersave, don't enable powersave until it becomes available in case the scan/association managed to not receive a beacon. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a8016c02a75..ef378b9a32e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1390,6 +1390,9 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) IEEE80211_STA_CONNECTION_POLL)) return false; + if (!sdata->vif.bss_conf.dtim_period) + return false; + rcu_read_lock(); sta = sta_info_get(sdata, mgd->bssid); if (sta) @@ -3126,6 +3129,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } changed |= BSS_CHANGED_DTIM_PERIOD; + ieee80211_recalc_ps_vif(sdata); } if (elems.erp_info) { -- cgit v1.2.3 From d2cf43674e17ca1c16c68d46d987d2f17bf7c371 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 May 2013 19:25:55 +0000 Subject: tcp: speedup tcp_fixup_rcvbuf() tcp_fixup_rcvbuf() contains a loop to estimate initial socket rcv space needed for a given mss. With large MTU (like 64K on lo), we can loop ~500 times and consume a lot of cpu cycles. perf top of 200 concurrent netperf -t TCP_CRR 5.62% netperf [kernel.kallsyms] [k] tcp_init_buffer_space 1.71% netperf [kernel.kallsyms] [k] _raw_spin_lock 1.55% netperf [kernel.kallsyms] [k] kmem_cache_free 1.51% netperf [kernel.kallsyms] [k] tcp_transmit_skb 1.50% netperf [kernel.kallsyms] [k] tcp_ack Lets use a 100% factor, and remove the loop. 100% is needed anyway for tcp_adv_win_scale=1 default value, and is also the maximum factor. Refs: commit b49960a05e32 ("tcp: change tcp_adv_win_scale and tcp_rmem[2]") Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 08bbe609652..b358e8c9860 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -360,9 +360,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) if (mss > 1460) icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); - rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER); - while (tcp_win_from_space(rcvmem) < mss) - rcvmem += 128; + rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER); rcvmem *= icwnd; -- cgit v1.2.3 From 57b354e66b67c4c72468a26d4313d1217ef32e17 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 16 May 2013 23:36:32 +0000 Subject: dev: remove duplicate 'skb->dev = dev' in dev_forward_skb() This was added by commit 59b9997baba5 (Revert "net: maintain namespace isolation between vlan and real device"). In fact, before the initial commit - the one that is reverted -, this statement was not present. 'skb->dev = dev' is already done in eth_type_trans(), which is call just after. Spotted-by: Alain Ritoux Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fc1e289397f..18e9730cc4b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1629,7 +1629,6 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) return NET_RX_DROP; } skb->skb_iif = 0; - skb->dev = dev; skb_dst_drop(skb); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; -- cgit v1.2.3 From caeaba79009c2ee858c3b2bf8caf922cd719fead Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 16 May 2013 22:32:00 +0000 Subject: ipv6: add support of peer address This patch adds the support of peer address for IPv6. For example, it is possible to specify the remote end of a 6inY tunnel. This was already possible in IPv4: ip addr add ip1 peer ip2 dev dev1 The peer address is specified with IFA_ADDRESS and the local address with IFA_LOCAL (like explained in include/uapi/linux/if_addr.h). Note that the API is not changed, because before this patch, it was not possible to specify two different addresses in IFA_LOCAL and IFA_REMOTE. There is a small change for the dump: if the peer is different from ::, IFA_ADDRESS will contain the peer address instead of the local address. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 64 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d1ab6ab29a5..d684d23bc02 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2402,6 +2402,7 @@ err_exit: * Manual configuration of address on an interface */ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, + const struct in6_addr *peer_pfx, unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { @@ -2457,6 +2458,8 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = jiffies; + if (peer_pfx) + ifp->peer_addr = *peer_pfx; spin_unlock_bh(&ifp->lock); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, @@ -2526,7 +2529,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL, ireq.ifr6_prefixlen, IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); @@ -3610,18 +3613,20 @@ restart: rcu_read_unlock_bh(); } -static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) +static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, + struct in6_addr **peer_pfx) { struct in6_addr *pfx = NULL; + *peer_pfx = NULL; + if (addr) pfx = nla_data(addr); if (local) { if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) - pfx = NULL; - else - pfx = nla_data(local); + *peer_pfx = pfx; + pfx = nla_data(local); } return pfx; @@ -3639,7 +3644,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; + struct in6_addr *pfx, *peer_pfx; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3647,7 +3652,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) return err; ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); if (pfx == NULL) return -EINVAL; @@ -3705,7 +3710,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; + struct in6_addr *pfx, *peer_pfx; struct inet6_ifaddr *ifa; struct net_device *dev; u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; @@ -3717,7 +3722,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) return err; ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); if (pfx == NULL) return -EINVAL; @@ -3745,7 +3750,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) * It would be best to check for !NLM_F_CREATE here but * userspace alreay relies on not having to provide this. */ - return inet6_addr_add(net, ifm->ifa_index, pfx, + return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx, ifm->ifa_prefixlen, ifa_flags, preferred_lft, valid_lft); } @@ -3802,6 +3807,7 @@ static inline int rt_scope(int ifa_scope) static inline int inet6_ifaddr_msgsize(void) { return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(16) /* IFA_LOCAL */ + nla_total_size(16) /* IFA_ADDRESS */ + nla_total_size(sizeof(struct ifa_cacheinfo)); } @@ -3840,13 +3846,22 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, valid = INFINITY_LIFE_TIME; } - if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || - put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; - } + if (ipv6_addr_type(&ifa->peer_addr) != IPV6_ADDR_ANY) { + if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 || + nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0) + goto error; + } else + if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0) + goto error; + + if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) + goto error; return nlmsg_end(skb, nlh); + +error: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, @@ -4046,7 +4061,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct net *net = sock_net(in_skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *addr = NULL; + struct in6_addr *addr = NULL, *peer; struct net_device *dev = NULL; struct inet6_ifaddr *ifa; struct sk_buff *skb; @@ -4056,7 +4071,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (err < 0) goto errout; - addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); if (addr == NULL) { err = -EINVAL; goto errout; @@ -4564,11 +4579,26 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); + if (ipv6_addr_type(&ifp->peer_addr) != IPV6_ADDR_ANY) + addrconf_prefix_route(&ifp->peer_addr, 128, + ifp->idev->dev, 0, 0); break; case RTM_DELADDR: if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); + if (ipv6_addr_type(&ifp->peer_addr) != IPV6_ADDR_ANY) { + struct rt6_info *rt; + struct net_device *dev = ifp->idev->dev; + + rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL, + dev->ifindex, 1); + if (rt) { + dst_hold(&rt->dst); + if (ip6_del_rt(rt)) + dst_free(&rt->dst); + } + } dst_hold(&ifp->rt->dst); if (ip6_del_rt(ifp->rt)) -- cgit v1.2.3 From 3e59cb0ddfd2c59991f38e89352ad8a3c71b2374 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 17 May 2013 13:45:05 +0000 Subject: tcp: remove bad timeout logic in fast recovery tcp_timeout_skb() was intended to trigger fast recovery on timeout, unfortunately in reality it often causes spurious retransmission storms during fast recovery. The particular sign is a fast retransmit over the highest sacked sequence (SND.FACK). Currently the RTO timer re-arming (as in RFC6298) offers a nice cushion to avoid spurious timeout: when SND.UNA advances the sender re-arms RTO and extends the timeout by icsk_rto. The sender does not offset the time elapsed since the packet at SND.UNA was sent. But if the next (DUP)ACK arrives later than ~RTTVAR and triggers tcp_fastretrans_alert(), then tcp_timeout_skb() will mark any packet sent before the icsk_rto interval lost, including one that's above the highest sacked sequence. Most likely a large part of scorebard will be marked. If most packets are not lost then the subsequent DUPACKs with new SACK blocks will cause the sender to continue to retransmit packets beyond SND.FACK spuriously. Even if only one packet is lost the sender may falsely retransmit almost the entire window. The situation becomes common in the world of bufferbloat: the RTT continues to grow as the queue builds up but RTTVAR remains small and close to the minimum 200ms. If a data packet is lost and the DUPACK triggered by the next data packet is slightly delayed, then a spurious retransmission storm forms. As the original comment on tcp_timeout_skb() suggests: the usefulness of this feature is questionable. It also wastes cycles walking the sack scoreboard and is actually harmful because of false recovery. It's time to remove this. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Nandita Dukkipati Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 65 +--------------------------------------------------- 1 file changed, 1 insertion(+), 64 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b358e8c9860..d7d369428ae 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1255,8 +1255,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, if (skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = prev; - if (skb == tp->scoreboard_skb_hint) - tp->scoreboard_skb_hint = prev; if (skb == tp->lost_skb_hint) { tp->lost_skb_hint = prev; tp->lost_cnt_hint -= tcp_skb_pcount(prev); @@ -1964,20 +1962,6 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) return true; } -static inline int tcp_skb_timedout(const struct sock *sk, - const struct sk_buff *skb) -{ - return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; -} - -static inline int tcp_head_timedout(const struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - - return tp->packets_out && - tcp_skb_timedout(sk, tcp_write_queue_head(sk)); -} - /* Linux NewReno/SACK/FACK/ECN state machine. * -------------------------------------- * @@ -2084,12 +2068,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) if (tcp_dupack_heuristics(tp) > tp->reordering) return true; - /* Trick#3 : when we use RFC2988 timer restart, fast - * retransmit can be triggered by timeout of queue head. - */ - if (tcp_is_fack(tp) && tcp_head_timedout(sk)) - return true; - /* Trick#4: It is still not OK... But will it be useful to delay * recovery more? */ @@ -2126,44 +2104,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) return false; } -/* New heuristics: it is possible only after we switched to restart timer - * each time when something is ACKed. Hence, we can detect timed out packets - * during fast retransmit without falling to slow start. - * - * Usefulness of this as is very questionable, since we should know which of - * the segments is the next to timeout which is relatively expensive to find - * in general case unless we add some data structure just for that. The - * current approach certainly won't find the right one too often and when it - * finally does find _something_ it usually marks large part of the window - * right away (because a retransmission with a larger timestamp blocks the - * loop from advancing). -ij - */ -static void tcp_timeout_skbs(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if (!tcp_is_fack(tp) || !tcp_head_timedout(sk)) - return; - - skb = tp->scoreboard_skb_hint; - if (tp->scoreboard_skb_hint == NULL) - skb = tcp_write_queue_head(sk); - - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (!tcp_skb_timedout(sk, skb)) - break; - - tcp_skb_mark_lost(tp, skb); - } - - tp->scoreboard_skb_hint = skb; - - tcp_verify_left_out(tp); -} - /* Detect loss in event "A" above by marking head of queue up as lost. * For FACK or non-SACK(Reno) senders, the first "packets" number of segments * are considered lost. For RFC3517 SACK, a segment is considered lost if it @@ -2249,8 +2189,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) else if (fast_rexmit) tcp_mark_head_lost(sk, 1, 1); } - - tcp_timeout_skbs(sk); } /* CWND moderation, preventing bursts due to too big ACKs @@ -2842,7 +2780,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, fast_rexmit = 1; } - if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) + if (do_lost) tcp_update_scoreboard(sk, fast_rexmit); tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); tcp_xmit_retransmit_queue(sk); @@ -3075,7 +3013,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - tp->scoreboard_skb_hint = NULL; if (skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = NULL; if (skb == tp->lost_skb_hint) -- cgit v1.2.3 From 99bbc70741903c063b3ccad90a3e06fc55df9245 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 20 May 2013 04:02:32 +0000 Subject: rps: selective flow shedding during softnet overflow A cpu executing the network receive path sheds packets when its input queue grows to netdev_max_backlog. A single high rate flow (such as a spoofed source DoS) can exceed a single cpu processing rate and will degrade throughput of other flows hashed onto the same cpu. This patch adds a more fine grained hashtable. If the netdev backlog is above a threshold, IRQ cpus track the ratio of total traffic of each flow (using 4096 buckets, configurable). The ratio is measured by counting the number of packets per flow over the last 256 packets from the source cpu. Any flow that occupies a large fraction of this (set at 50%) will see packet drop while above the threshold. Tested: Setup is a muli-threaded UDP echo server with network rx IRQ on cpu0, kernel receive (RPS) on cpu0 and application threads on cpus 2--7 each handling 20k req/s. Throughput halves when hit with a 400 kpps antagonist storm. With this patch applied, antagonist overload is dropped and the server processes its complete load. The patch is effective when kernel receive processing is the bottleneck. The above RPS scenario is a extreme, but the same is reached with RFS and sufficient kernel processing (iptables, packet socket tap, ..). Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/Kconfig | 12 ++++++ net/core/dev.c | 48 ++++++++++++++++++++- net/core/net-procfs.c | 16 ++++++- net/core/sysctl_net_core.c | 104 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 177 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 2ddc9046868..08de901415e 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -259,6 +259,18 @@ config BPF_JIT packet sniffing (libpcap/tcpdump). Note : Admin should enable this feature changing /proc/sys/net/core/bpf_jit_enable +config NET_FLOW_LIMIT + boolean + depends on RPS + default y + ---help--- + The network stack has to drop packets when a receive processing CPU's + backlog reaches netdev_max_backlog. If a few out of many active flows + generate the vast majority of load, drop their traffic earlier to + maintain capacity for the other flows. This feature provides servers + with many clients some protection against DoS by a single (spoofed) + flow that greatly exceeds average workload. + menu "Network testing" config NET_PKTGEN diff --git a/net/core/dev.c b/net/core/dev.c index 18e9730cc4b..7229bc30e50 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3064,6 +3064,46 @@ static int rps_ipi_queued(struct softnet_data *sd) return 0; } +#ifdef CONFIG_NET_FLOW_LIMIT +int netdev_flow_limit_table_len __read_mostly = (1 << 12); +#endif + +static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) +{ +#ifdef CONFIG_NET_FLOW_LIMIT + struct sd_flow_limit *fl; + struct softnet_data *sd; + unsigned int old_flow, new_flow; + + if (qlen < (netdev_max_backlog >> 1)) + return false; + + sd = &__get_cpu_var(softnet_data); + + rcu_read_lock(); + fl = rcu_dereference(sd->flow_limit); + if (fl) { + new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); + old_flow = fl->history[fl->history_head]; + fl->history[fl->history_head] = new_flow; + + fl->history_head++; + fl->history_head &= FLOW_LIMIT_HISTORY - 1; + + if (likely(fl->buckets[old_flow])) + fl->buckets[old_flow]--; + + if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) { + fl->count++; + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); +#endif + return false; +} + /* * enqueue_to_backlog is called to queue an skb to a per CPU backlog * queue (may be a remote CPU queue). @@ -3073,13 +3113,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, { struct softnet_data *sd; unsigned long flags; + unsigned int qlen; sd = &per_cpu(softnet_data, cpu); local_irq_save(flags); rps_lock(sd); - if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { + qlen = skb_queue_len(&sd->input_pkt_queue); + if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (skb_queue_len(&sd->input_pkt_queue)) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); @@ -6269,6 +6311,10 @@ static int __init net_dev_init(void) sd->backlog.weight = weight_p; sd->backlog.gro_list = NULL; sd->backlog.gro_count = 0; + +#ifdef CONFIG_NET_FLOW_LIMIT + sd->flow_limit = NULL; +#endif } dev_boot_phase = 0; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 569d355fec3..2bf83299600 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -146,11 +146,23 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) static int softnet_seq_show(struct seq_file *seq, void *v) { struct softnet_data *sd = v; + unsigned int flow_limit_count = 0; - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", +#ifdef CONFIG_NET_FLOW_LIMIT + struct sd_flow_limit *fl; + + rcu_read_lock(); + fl = rcu_dereference(sd->flow_limit); + if (fl) + flow_limit_count = fl->count; + rcu_read_unlock(); +#endif + + seq_printf(seq, + "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - sd->cpu_collision, sd->received_rps); + sd->cpu_collision, sd->received_rps, flow_limit_count); return 0; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cfdb46ab3a7..741db5fc780 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -87,6 +87,96 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, } #endif /* CONFIG_RPS */ +#ifdef CONFIG_NET_FLOW_LIMIT +static DEFINE_MUTEX(flow_limit_update_mutex); + +static int flow_limit_cpu_sysctl(ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct sd_flow_limit *cur; + struct softnet_data *sd; + cpumask_var_t mask; + int i, len, ret = 0; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + if (write) { + ret = cpumask_parse_user(buffer, *lenp, mask); + if (ret) + goto done; + + mutex_lock(&flow_limit_update_mutex); + len = sizeof(*cur) + netdev_flow_limit_table_len; + for_each_possible_cpu(i) { + sd = &per_cpu(softnet_data, i); + cur = rcu_dereference_protected(sd->flow_limit, + lockdep_is_held(&flow_limit_update_mutex)); + if (cur && !cpumask_test_cpu(i, mask)) { + RCU_INIT_POINTER(sd->flow_limit, NULL); + synchronize_rcu(); + kfree(cur); + } else if (!cur && cpumask_test_cpu(i, mask)) { + cur = kzalloc(len, GFP_KERNEL); + if (!cur) { + /* not unwinding previous changes */ + ret = -ENOMEM; + goto write_unlock; + } + cur->num_buckets = netdev_flow_limit_table_len; + rcu_assign_pointer(sd->flow_limit, cur); + } + } +write_unlock: + mutex_unlock(&flow_limit_update_mutex); + } else { + if (*ppos || !*lenp) { + *lenp = 0; + goto done; + } + + cpumask_clear(mask); + rcu_read_lock(); + for_each_possible_cpu(i) { + sd = &per_cpu(softnet_data, i); + if (rcu_dereference(sd->flow_limit)) + cpumask_set_cpu(i, mask); + } + rcu_read_unlock(); + + len = cpumask_scnprintf(buffer, *lenp, mask); + *lenp = len + 1; + *ppos += len + 1; + } + +done: + free_cpumask_var(mask); + return ret; +} + +static int flow_limit_table_len_sysctl(ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + unsigned int old, *ptr; + int ret; + + mutex_lock(&flow_limit_update_mutex); + + ptr = table->data; + old = *ptr; + ret = proc_dointvec(table, write, buffer, lenp, ppos); + if (!ret && write && !is_power_of_2(*ptr)) { + *ptr = old; + ret = -EINVAL; + } + + mutex_unlock(&flow_limit_update_mutex); + return ret; +} +#endif /* CONFIG_NET_FLOW_LIMIT */ + static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { @@ -180,6 +270,20 @@ static struct ctl_table net_core_table[] = { .proc_handler = rps_sock_flow_sysctl }, #endif +#ifdef CONFIG_NET_FLOW_LIMIT + { + .procname = "flow_limit_cpu_bitmap", + .mode = 0644, + .proc_handler = flow_limit_cpu_sysctl + }, + { + .procname = "flow_limit_table_len", + .data = &netdev_flow_limit_table_len, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = flow_limit_table_len_sysctl + }, +#endif /* CONFIG_NET_FLOW_LIMIT */ #endif /* CONFIG_NET */ { .procname = "netdev_budget", -- cgit v1.2.3 From 71cea17ed39fdf1c0634f530ddc6a2c2fc601c2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 May 2013 06:52:26 +0000 Subject: tcp: md5: remove spinlock usage in fast path TCP md5 code uses per cpu variables but protects access to them with a shared spinlock, which is a contention point. [ tcp_md5sig_pool_lock is locked twice per incoming packet ] Makes things much simpler, by allocating crypto structures once, first time a socket needs md5 keys, and not deallocating them as they are really small. Next step would be to allow crypto allocations being done in a NUMA aware way. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 98 +++++++++++------------------------------------- net/ipv4/tcp_ipv4.c | 10 +---- net/ipv4/tcp_minisocks.c | 6 +-- 3 files changed, 24 insertions(+), 90 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dcb116dde21..53d9c120fbb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3095,9 +3095,8 @@ int tcp_gro_complete(struct sk_buff *skb) EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG -static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; -static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); +static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_MUTEX(tcp_md5sig_mutex); static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) { @@ -3112,30 +3111,14 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) free_percpu(pool); } -void tcp_free_md5sig_pool(void) -{ - struct tcp_md5sig_pool __percpu *pool = NULL; - - spin_lock_bh(&tcp_md5sig_pool_lock); - if (--tcp_md5sig_users == 0) { - pool = tcp_md5sig_pool; - tcp_md5sig_pool = NULL; - } - spin_unlock_bh(&tcp_md5sig_pool_lock); - if (pool) - __tcp_free_md5sig_pool(pool); -} -EXPORT_SYMBOL(tcp_free_md5sig_pool); - -static struct tcp_md5sig_pool __percpu * -__tcp_alloc_md5sig_pool(struct sock *sk) +static void __tcp_alloc_md5sig_pool(void) { int cpu; struct tcp_md5sig_pool __percpu *pool; pool = alloc_percpu(struct tcp_md5sig_pool); if (!pool) - return NULL; + return; for_each_possible_cpu(cpu) { struct crypto_hash *hash; @@ -3146,53 +3129,27 @@ __tcp_alloc_md5sig_pool(struct sock *sk) per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; } - return pool; + /* before setting tcp_md5sig_pool, we must commit all writes + * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + */ + smp_wmb(); + tcp_md5sig_pool = pool; + return; out_free: __tcp_free_md5sig_pool(pool); - return NULL; } -struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) +bool tcp_alloc_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *pool; - bool alloc = false; - -retry: - spin_lock_bh(&tcp_md5sig_pool_lock); - pool = tcp_md5sig_pool; - if (tcp_md5sig_users++ == 0) { - alloc = true; - spin_unlock_bh(&tcp_md5sig_pool_lock); - } else if (!pool) { - tcp_md5sig_users--; - spin_unlock_bh(&tcp_md5sig_pool_lock); - cpu_relax(); - goto retry; - } else - spin_unlock_bh(&tcp_md5sig_pool_lock); - - if (alloc) { - /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool __percpu *p; - - p = __tcp_alloc_md5sig_pool(sk); - spin_lock_bh(&tcp_md5sig_pool_lock); - if (!p) { - tcp_md5sig_users--; - spin_unlock_bh(&tcp_md5sig_pool_lock); - return NULL; - } - pool = tcp_md5sig_pool; - if (pool) { - /* oops, it has already been assigned. */ - spin_unlock_bh(&tcp_md5sig_pool_lock); - __tcp_free_md5sig_pool(p); - } else { - tcp_md5sig_pool = pool = p; - spin_unlock_bh(&tcp_md5sig_pool_lock); - } + if (unlikely(!tcp_md5sig_pool)) { + mutex_lock(&tcp_md5sig_mutex); + + if (!tcp_md5sig_pool) + __tcp_alloc_md5sig_pool(); + + mutex_unlock(&tcp_md5sig_mutex); } - return pool; + return tcp_md5sig_pool != NULL; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -3209,28 +3166,15 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) struct tcp_md5sig_pool __percpu *p; local_bh_disable(); - - spin_lock(&tcp_md5sig_pool_lock); - p = tcp_md5sig_pool; - if (p) - tcp_md5sig_users++; - spin_unlock(&tcp_md5sig_pool_lock); - + p = ACCESS_ONCE(tcp_md5sig_pool); if (p) - return this_cpu_ptr(p); + return __this_cpu_ptr(p); local_bh_enable(); return NULL; } EXPORT_SYMBOL(tcp_get_md5sig_pool); -void tcp_put_md5sig_pool(void) -{ - local_bh_enable(); - tcp_free_md5sig_pool(); -} -EXPORT_SYMBOL(tcp_put_md5sig_pool); - int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, const struct tcphdr *th) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 719652305a2..d20ede0c959 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1026,7 +1026,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key = sock_kmalloc(sk, sizeof(*key), gfp); if (!key) return -ENOMEM; - if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { + if (!tcp_alloc_md5sig_pool()) { sock_kfree_s(sk, key, sizeof(*key)); return -ENOMEM; } @@ -1044,9 +1044,7 @@ EXPORT_SYMBOL(tcp_md5_do_add); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { - struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; - struct tcp_md5sig_info *md5sig; key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); if (!key) @@ -1054,10 +1052,6 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); - md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk)); - if (hlist_empty(&md5sig->head)) - tcp_free_md5sig_pool(); return 0; } EXPORT_SYMBOL(tcp_md5_do_del); @@ -1071,8 +1065,6 @@ static void tcp_clear_md5_list(struct sock *sk) md5sig = rcu_dereference_protected(tp->md5sig_info, 1); - if (!hlist_empty(&md5sig->head)) - tcp_free_md5sig_pool(); hlist_for_each_entry_safe(key, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0f017882725..ab1c0865852 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -317,7 +317,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) key = tp->af_specific->md5_lookup(sk, sk); if (key != NULL) { tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); - if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL) + if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()) BUG(); } } while (0); @@ -358,10 +358,8 @@ void tcp_twsk_destructor(struct sock *sk) #ifdef CONFIG_TCP_MD5SIG struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_key) { - tcp_free_md5sig_pool(); + if (twsk->tw_md5_key) kfree_rcu(twsk->tw_md5_key, rcu); - } #endif } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); -- cgit v1.2.3 From b6040f9706c4c81cc50b50855ed70840f022bebb Mon Sep 17 00:00:00 2001 From: chaoting fan Date: Thu, 28 Mar 2013 22:19:45 +0800 Subject: sunrpc: the cache_detail in cache_is_valid is unused any more The cache_detail(*detail) in function cache_is_valid is not used any more. Signed-off-by: fanchaoting Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 80fe5c86efd..3b3f14fc02c 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -201,7 +201,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) return sunrpc_cache_pipe_upcall(cd, h); } -static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) +static inline int cache_is_valid(struct cache_head *h) { if (!test_bit(CACHE_VALID, &h->flags)) return -EAGAIN; @@ -227,7 +227,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h int rv; write_lock(&detail->hash_lock); - rv = cache_is_valid(detail, h); + rv = cache_is_valid(h); if (rv != -EAGAIN) { write_unlock(&detail->hash_lock); return rv; @@ -260,7 +260,7 @@ int cache_check(struct cache_detail *detail, long refresh_age, age; /* First decide return status as best we can */ - rv = cache_is_valid(detail, h); + rv = cache_is_valid(h); /* now see if we want to start an upcall */ refresh_age = (h->expiry_time - h->last_refresh); @@ -293,7 +293,7 @@ int cache_check(struct cache_detail *detail, * Request was not deferred; handle it as best * we can ourselves: */ - rv = cache_is_valid(detail, h); + rv = cache_is_valid(h); if (rv == -EAGAIN) rv = -ETIMEDOUT; } -- cgit v1.2.3 From 1c8ad5bfa2be5025b0c81e3c2decd0574d453ab1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 21 May 2013 21:52:54 +0000 Subject: bridge: use the bridge IP addr as source addr for querier Quote from Adam: "If it is believed that the use of 0.0.0.0 as the IP address is what is causing strange behaviour on other devices then is there a good reason that a bridge rather than a router shouldn't be the active querier? If not then using the bridge IP address and having the querier enabled by default may be a reasonable solution (provided that our querier obeys the election rules and shuts up if it sees a query from a lower IP address that isn't 0.0.0.0). Just because a device is the elected querier for IGMP doesn't appear to mean it is required to perform any other routing functions." And introduce a new troggle for it, as suggested by Herbert. Suggested-by: Adam Baker Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Adam Baker Signed-off-by: Cong Wang Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 5 ++++- net/bridge/br_private.h | 1 + net/bridge/br_sysfs_br.c | 26 ++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 81f2389f78e..24751479310 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #if IS_ENABLED(CONFIG_IPV6) #include @@ -381,7 +382,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, iph->frag_off = htons(IP_DF); iph->ttl = 1; iph->protocol = IPPROTO_IGMP; - iph->saddr = 0; + iph->saddr = br->multicast_query_use_ifaddr ? + inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0; iph->daddr = htonl(INADDR_ALLHOSTS_GROUP); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; @@ -1618,6 +1620,7 @@ void br_multicast_init(struct net_bridge *br) br->multicast_router = 1; br->multicast_querier = 0; + br->multicast_query_use_ifaddr = 0; br->multicast_last_member_count = 2; br->multicast_startup_query_count = 2; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d2c043a857b..e260710a01d 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -249,6 +249,7 @@ struct net_bridge u8 multicast_disabled:1; u8 multicast_querier:1; + u8 multicast_query_use_ifaddr:1; u32 hash_elasticity; u32 hash_max; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 8baa9c08e1a..394bb96b608 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -375,6 +375,31 @@ static ssize_t store_multicast_snooping(struct device *d, static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR, show_multicast_snooping, store_multicast_snooping); +static ssize_t show_multicast_query_use_ifaddr(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr); +} + +static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val) +{ + br->multicast_query_use_ifaddr = !!val; + return 0; +} + +static ssize_t +store_multicast_query_use_ifaddr(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_use_ifaddr); +} +static DEVICE_ATTR(multicast_query_use_ifaddr, S_IRUGO | S_IWUSR, + show_multicast_query_use_ifaddr, + store_multicast_query_use_ifaddr); + static ssize_t show_multicast_querier(struct device *d, struct device_attribute *attr, char *buf) @@ -734,6 +759,7 @@ static struct attribute *bridge_attrs[] = { &dev_attr_multicast_router.attr, &dev_attr_multicast_snooping.attr, &dev_attr_multicast_querier.attr, + &dev_attr_multicast_query_use_ifaddr.attr, &dev_attr_hash_elasticity.attr, &dev_attr_hash_max.attr, &dev_attr_multicast_last_member_count.attr, -- cgit v1.2.3 From 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 21 May 2013 21:52:55 +0000 Subject: bridge: only expire the mdb entry when query is received Currently we arm the expire timer when the mdb entry is added, however, this causes problem when there is no querier sent out after that. So we should only arm the timer when a corresponding query is received, as suggested by Herbert. And he also mentioned "if there is no querier then group subscriptions shouldn't expire. There has to be at least one querier in the network for this thing to work. Otherwise it just degenerates into a non-snooping switch, which is OK." Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Adam Baker Signed-off-by: Cong Wang Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 39 ++++++++++++--------------------------- net/bridge/br_private.h | 1 + 2 files changed, 13 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 24751479310..40bda804fbd 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -617,8 +617,6 @@ rehash: mp->br = br; mp->addr = *group; - setup_timer(&mp->timer, br_multicast_group_expired, - (unsigned long)mp); hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); mdb->size++; @@ -656,7 +654,6 @@ static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; - unsigned long now = jiffies; int err; spin_lock(&br->multicast_lock); @@ -671,7 +668,6 @@ static int br_multicast_add_group(struct net_bridge *br, if (!port) { mp->mglist = true; - mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } @@ -679,7 +675,7 @@ static int br_multicast_add_group(struct net_bridge *br, (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->port == port) - goto found; + goto out; if ((unsigned long)p->port < (unsigned long)port) break; } @@ -690,8 +686,6 @@ static int br_multicast_add_group(struct net_bridge *br, rcu_assign_pointer(*pp, p); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); -found: - mod_timer(&p->timer, now + br->multicast_membership_interval); out: err = 0; @@ -1131,6 +1125,10 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!mp) goto out; + setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); + mod_timer(&mp->timer, now + br->multicast_membership_interval); + mp->timer_armed = true; + max_delay *= br->multicast_last_member_count; if (mp->mglist && @@ -1205,6 +1203,10 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!mp) goto out; + setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); + mod_timer(&mp->timer, now + br->multicast_membership_interval); + mp->timer_armed = true; + max_delay *= br->multicast_last_member_count; if (mp->mglist && (timer_pending(&mp->timer) ? @@ -1263,7 +1265,7 @@ static void br_multicast_leave_group(struct net_bridge *br, call_rcu_bh(&p->rcu, br_multicast_free_pg); br_mdb_notify(br->dev, port, group, RTM_DELMDB); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->mglist && mp->timer_armed && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); } @@ -1275,30 +1277,12 @@ static void br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (mp->mglist && + if (mp->mglist && mp->timer_armed && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { mod_timer(&mp->timer, time); } - - goto out; - } - - for (p = mlock_dereference(mp->ports, br); - p != NULL; - p = mlock_dereference(p->next, br)) { - if (p->port != port) - continue; - - if (!hlist_unhashed(&p->mglist) && - (timer_pending(&p->timer) ? - time_after(p->timer.expires, time) : - try_to_del_timer_sync(&p->timer) >= 0)) { - mod_timer(&p->timer, time); - } - - break; } out: @@ -1674,6 +1658,7 @@ void br_multicast_stop(struct net_bridge *br) hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); + mp->timer_armed = false; call_rcu_bh(&mp->rcu, br_multicast_free_group); } } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e260710a01d..1b0ac95a5c3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -112,6 +112,7 @@ struct net_bridge_mdb_entry struct timer_list timer; struct br_ip addr; bool mglist; + bool timer_armed; }; struct net_bridge_mdb_htable -- cgit v1.2.3 From 6b7df111ece130fa979a0c4f58e53674c1e47d3e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 21 May 2013 21:52:56 +0000 Subject: bridge: send query as soon as leave is received Continue sending queries when leave is received if the user marks it as a querier. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Adam Baker Signed-off-by: Cong Wang Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 40bda804fbd..37a46769796 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1250,6 +1250,32 @@ static void br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; + if (br->multicast_querier && + !timer_pending(&br->multicast_querier_timer)) { + __br_multicast_send_query(br, port, &mp->addr); + + time = jiffies + br->multicast_last_member_count * + br->multicast_last_member_interval; + mod_timer(port ? &port->multicast_query_timer : + &br->multicast_query_timer, time); + + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { + if (p->port != port) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + } + + break; + } + } + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { struct net_bridge_port_group __rcu **pp; -- cgit v1.2.3 From 1cdbcb7957cf9e5f841dbcde9b38fd18a804208b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 19 May 2013 15:46:49 +0000 Subject: net: Loosen constraints for recalculating checksum in skb_segment() This is a generic solution to resolve a specific problem that I have observed. If the encapsulation of an skb changes then ability to offload checksums may also change. In particular it may be necessary to perform checksumming in software. An example of such a case is where a non-GRE packet is received but is to be encapsulated and transmitted as GRE. Another example relates to my proposed support for for packets that are non-MPLS when received but MPLS when transmitted. The cost of this change is that the value of the csum variable may be checked when it previously was not. In the case where the csum variable is true this is pure overhead. In the case where the csum variable is false it leads to software checksumming, which I believe also leads to correct checksums in transmitted packets for the cases described above. Further analysis: This patch relies on the return value of can_checksum_protocol() being correct and in turn the return value of skb_network_protocol(), used to provide the protocol parameter of can_checksum_protocol(), being correct. It also relies on the features passed to skb_segment() and in turn to can_checksum_protocol() being correct. I believe that this problem has not been observed for VLANs because it appears that almost all drivers, the exception being xgbe, set vlan_features such that that the checksum offload support for VLAN packets is greater than or equal to that of non-VLAN packets. I wonder if the code in xgbe may be an oversight and the hardware does support checksumming of VLAN packets. If so it may be worth updating the vlan_features of the driver as this patch will force such checksums to be performed in software rather than hardware. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index af9185d0be6..d6298914f4e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2853,7 +2853,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) - continue; + goto perform_csum_check; if (!sg) { nskb->ip_summed = CHECKSUM_NONE; @@ -2917,6 +2917,7 @@ skip_fraglist: nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; +perform_csum_check: if (!csum) { nskb->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); -- cgit v1.2.3 From e43ac79a4bc6ca90de4ba10983b4ca39cd215b4b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 May 2013 08:16:46 +0000 Subject: sch_tbf: segment too big GSO packets If a GSO packet has a length above tbf burst limit, the packet is currently silently dropped. Current way to handle this is to set the device in non GSO/TSO mode, or setting high bursts, and its sub optimal. We can actually segment too big GSO packets, and send individual segments as tbf parameters allow, allowing for better interoperability. Signed-off-by: Eric Dumazet Cc: Ben Hutchings Cc: Jiri Pirko Cc: Jamal Hadi Salim Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index c8388f3c342..38008b0980d 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -116,14 +116,57 @@ struct tbf_sched_data { struct qdisc_watchdog watchdog; /* Watchdog timer */ }; + +/* GSO packet is too big, segment it so that tbf can transmit + * each segment in time + */ +static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) +{ + struct tbf_sched_data *q = qdisc_priv(sch); + struct sk_buff *segs, *nskb; + netdev_features_t features = netif_skb_features(skb); + int ret, nb; + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + + if (IS_ERR_OR_NULL(segs)) + return qdisc_reshape_fail(skb, sch); + + nb = 0; + while (segs) { + nskb = segs->next; + segs->next = NULL; + if (likely(segs->len <= q->max_size)) { + qdisc_skb_cb(segs)->pkt_len = segs->len; + ret = qdisc_enqueue(segs, q->qdisc); + } else { + ret = qdisc_reshape_fail(skb, sch); + } + if (ret != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; + } else { + nb++; + } + segs = nskb; + } + sch->q.qlen += nb; + if (nb > 1) + qdisc_tree_decrease_qlen(sch, 1 - nb); + consume_skb(skb); + return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; +} + static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); int ret; - if (qdisc_pkt_len(skb) > q->max_size) + if (qdisc_pkt_len(skb) > q->max_size) { + if (skb_is_gso(skb)) + return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); - + } ret = qdisc_enqueue(skb, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) -- cgit v1.2.3 From 7996c799ae329fab1b9c8d475fd08883f0499ed9 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 22 May 2013 05:41:06 +0000 Subject: ipv6: use ipv6_addr_any() helper ipv6_addr_any() is a faster way to determine if an addr is ipv6 any addr, no need to compute the addr type. Cc: Nicolas Dichtel Cc: Hideaki YOSHIFUJI Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d684d23bc02..e05269647c2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3846,7 +3846,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, valid = INFINITY_LIFE_TIME; } - if (ipv6_addr_type(&ifa->peer_addr) != IPV6_ADDR_ANY) { + if (!ipv6_addr_any(&ifa->peer_addr)) { if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 || nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0) goto error; @@ -4579,7 +4579,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); - if (ipv6_addr_type(&ifp->peer_addr) != IPV6_ADDR_ANY) + if (!ipv6_addr_any(&ifp->peer_addr)) addrconf_prefix_route(&ifp->peer_addr, 128, ifp->idev->dev, 0, 0); break; @@ -4587,7 +4587,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); - if (ipv6_addr_type(&ifp->peer_addr) != IPV6_ADDR_ANY) { + if (!ipv6_addr_any(&ifp->peer_addr)) { struct rt6_info *rt; struct net_device *dev = ifp->idev->dev; -- cgit v1.2.3 From 8892475386e819aa50856947948c546ccc964d96 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 22 May 2013 05:52:22 +0000 Subject: ipv6: use ipv6_addr_scope() helper ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK could be replaced by ipv6_addr_scope(), which is slightly faster. Cc: Hideaki YOSHIFUJI Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e05269647c2..432e084b6b6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1126,8 +1126,7 @@ retry: ift = !max_addresses || ipv6_count_addresses(idev) < max_addresses ? - ipv6_add_addr(idev, &addr, tmp_plen, - ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, + ipv6_add_addr(idev, &addr, tmp_plen, ipv6_addr_scope(&addr), addr_flags) : NULL; if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); -- cgit v1.2.3 From 27e7190efd5b2f728686a8293af6d9bd34c4e562 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 May 2013 11:10:57 +0000 Subject: netfilter: xt_CT: optimize XT_CT_NOTRACK The percpu untracked ct are not currently used for XT_CT_NOTRACK. xt_ct_tg_check()/xt_ct_target() provides a single ct. Thats not optimal as the ct->ct_general.use cache line will bounce among cpus. Use the intended [1] thing : xt_ct_target() should select the percpu object. [1] Refs : commit 5bfddbd46a95c97 ("netfilter: nf_conntrack: IPS_UNTRACKED bit") commit b3c5163fe0193a7 ("netfilter: nf_conntrack: per_cpu untracking") Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_CT.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index a60261cb0e8..da35ac06a97 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -26,6 +26,9 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) if (skb->nfct != NULL) return XT_CONTINUE; + /* special case the untracked ct : we want the percpu object */ + if (!ct) + ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); skb->nfct = &ct->ct_general; skb->nfctinfo = IP_CT_NEW; @@ -186,8 +189,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, int ret = -EOPNOTSUPP; if (info->flags & XT_CT_NOTRACK) { - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); + ct = NULL; goto out; } @@ -311,7 +313,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct nf_conn *ct = info->ct; struct nf_conn_help *help; - if (!nf_ct_is_untracked(ct)) { + if (ct && !nf_ct_is_untracked(ct)) { help = nfct_help(ct); if (help) module_put(help->helper->me); @@ -319,8 +321,8 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, nf_ct_l3proto_module_put(par->family); xt_ct_destroy_timeout(ct); + nf_ct_put(info->ct); } - nf_ct_put(info->ct); } static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) -- cgit v1.2.3 From 00028aa37098168048728acc32ab0206687f2920 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 May 2013 11:01:06 +0000 Subject: netfilter: xt_socket: use IP early demux With IP early demux added in linux-3.6, we perform TCP lookup in IP layer before iptables hooks. We can avoid doing a second lookup in xt_socket. Signed-off-by: Eric Dumazet Acked-by: David S. Miller Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 63b2bdb59e9..02704245710 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -107,7 +107,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; - struct sock *sk; + struct sock *sk = skb->sk; __be32 uninitialized_var(daddr), uninitialized_var(saddr); __be16 uninitialized_var(dport), uninitialized_var(sport); u8 uninitialized_var(protocol); @@ -155,9 +155,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, } #endif - sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, - saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); - if (sk != NULL) { + if (!sk) + sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, + saddr, daddr, sport, dport, + par->in, NFT_LOOKUP_ANY); + if (sk) { bool wildcard; bool transparent = true; @@ -173,7 +175,8 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - xt_socket_put_sk(sk); + if (sk != skb->sk) + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; @@ -260,7 +263,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) { struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; - struct sock *sk; + struct sock *sk = skb->sk; struct in6_addr *daddr = NULL, *saddr = NULL; __be16 uninitialized_var(dport), uninitialized_var(sport); int thoff = 0, uninitialized_var(tproto); @@ -291,9 +294,11 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) return false; } - sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, - saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); - if (sk != NULL) { + if (!sk) + sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + saddr, daddr, sport, dport, + par->in, NFT_LOOKUP_ANY); + if (sk) { bool wildcard; bool transparent = true; @@ -309,7 +314,8 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - xt_socket_put_sk(sk); + if (sk != skb->sk) + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; -- cgit v1.2.3 From 8bc14d25ffb9dfc242d3a877bb4fe683adb27692 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Thu, 16 May 2013 22:07:22 +0000 Subject: bridge: netfilter: using strlcpy() instead of strncpy() 'name' has already set all zero when it is defined, so not need let strncpy() to pad it again. 'name' is a string, better always let is NUL terminated, so use strlcpy() instead of strncpy(). Signed-off-by: Chen Gang Acked-by: Bart De Schuymer Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3d110c4fc78..ac780242838 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1339,7 +1339,7 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m, /* ebtables expects 32 bytes long names but xt_match names are 29 bytes long. Copy 29 bytes and fill remaining bytes with zeroes. */ - strncpy(name, m->u.match->name, sizeof(name)); + strlcpy(name, m->u.match->name, sizeof(name)); if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; @@ -1351,7 +1351,7 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, char __user *hlp = ubase + ((char *)w - base); char name[EBT_FUNCTION_MAXNAMELEN] = {}; - strncpy(name, w->u.watcher->name, sizeof(name)); + strlcpy(name, w->u.watcher->name, sizeof(name)); if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; @@ -1377,7 +1377,7 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase); if (ret != 0) return ret; - strncpy(name, t->u.target->name, sizeof(name)); + strlcpy(name, t->u.target->name, sizeof(name)); if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; -- cgit v1.2.3 From 6d11cfdba52af08b889fd6d3ee4212930493eb38 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 22 May 2013 22:42:36 +0000 Subject: netfilter: don't panic on error while walking through the init path Don't panic if we hit an error while adding the nf_log or pernet netfilter support, just bail out. Signed-off-by: Pablo Neira Ayuso Acked-by: Gao feng --- net/netfilter/core.c | 21 +++++++++++++++------ net/netfilter/nf_log.c | 5 +---- net/socket.c | 4 +++- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 07c865a31a3..300539db7bb 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -302,17 +302,26 @@ static struct pernet_operations netfilter_net_ops = { .exit = netfilter_net_exit, }; -void __init netfilter_init(void) +int __init netfilter_init(void) { - int i, h; + int i, h, ret; + for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); } - if (register_pernet_subsys(&netfilter_net_ops) < 0) - panic("cannot create netfilter proc entry"); + ret = register_pernet_subsys(&netfilter_net_ops); + if (ret < 0) + goto err; + + ret = netfilter_log_init(); + if (ret < 0) + goto err_pernet; - if (netfilter_log_init() < 0) - panic("cannot initialize nf_log"); + return 0; +err_pernet: + unregister_pernet_subsys(&netfilter_net_ops); +err: + return ret; } diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 388656d5a9e..bd5474adcab 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -368,10 +368,7 @@ static int __net_init nf_log_net_init(struct net *net) return 0; out_sysctl: - /* For init_net: errors will trigger panic, don't unroll on error. */ - if (!net_eq(net, &init_net)) - remove_proc_entry("nf_log", net->nf.proc_netfilter); - + remove_proc_entry("nf_log", net->nf.proc_netfilter); return ret; } diff --git a/net/socket.c b/net/socket.c index 6b94633ca61..734194d3624 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2612,7 +2612,9 @@ static int __init sock_init(void) */ #ifdef CONFIG_NETFILTER - netfilter_init(); + err = netfilter_init(); + if (err) + goto out; #endif #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING -- cgit v1.2.3 From de94c4591bd606729af1b913d6e98c6c449e42df Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 22 May 2013 22:42:37 +0000 Subject: netfilter: {ipt,ebt}_ULOG: rise warning on deprecation This target has been superseded by NFLOG. Spot a warning so we prepare removal in a couple of years. Signed-off-by: Pablo Neira Ayuso Acked-by: Gao feng --- net/bridge/netfilter/ebt_ulog.c | 6 ++++++ net/ipv4/netfilter/Kconfig | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index fc1905c5141..2ec6c19ff90 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -267,6 +267,12 @@ static int ebt_ulog_tg_check(const struct xt_tgchk_param *par) { struct ebt_ulog_info *uloginfo = par->targinfo; + if (!par->net->xt.ebt_ulog_warn_deprecated) { + pr_info("ebt_ulog is deprecated and it will be removed soon, " + "use ebt_nflog instead\n"); + par->net->xt.ebt_ulog_warn_deprecated = true; + } + if (uloginfo->nlgroup > 31) return -EINVAL; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index e7916c19393..4e902801742 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -111,7 +111,7 @@ config IP_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ULOG - tristate "ULOG target support" + tristate "ULOG target support (obsolete)" default m if NETFILTER_ADVANCED=n ---help--- diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index f8a222cb644..c1953d07e2f 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -325,6 +325,12 @@ static int ulog_tg_check(const struct xt_tgchk_param *par) { const struct ipt_ulog_info *loginfo = par->targinfo; + if (!par->net->xt.ulog_warn_deprecated) { + pr_info("ULOG is deprecated and it will be removed soon, " + "use NFLOG instead\n"); + par->net->xt.ulog_warn_deprecated = true; + } + if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { pr_debug("prefix not null-terminated\n"); return -EINVAL; -- cgit v1.2.3 From a38e5e230e3f4e7bc9195d3e7a81567c888257ca Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 22 May 2013 14:50:32 +0900 Subject: ipvs: use cond_resched_rcu() helper when walking connections This avoids the situation where walking of a large number of connections may prevent scheduling for a long time while also avoiding excessive calls to rcu_read_unlock() and rcu_read_lock(). Note that in the case of !CONFIG_PREEMPT_RCU this will add a call to cond_resched(). Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Acked-by: Peter Zijlstra Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a083bda322b..c8c52a98590 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -975,8 +975,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) return cp; } } - rcu_read_unlock(); - rcu_read_lock(); + cond_resched_rcu(); } return NULL; @@ -1015,8 +1014,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) iter->l = &ip_vs_conn_tab[idx]; return cp; } - rcu_read_unlock(); - rcu_read_lock(); + cond_resched_rcu(); } iter->l = NULL; return NULL; @@ -1206,17 +1204,13 @@ void ip_vs_random_dropentry(struct net *net) int idx; struct ip_vs_conn *cp, *cp_c; + rcu_read_lock(); /* * Randomly scan 1/32 of the whole table every second */ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { unsigned int hash = net_random() & ip_vs_conn_tab_mask; - /* - * Lock is actually needed in this loop. - */ - rcu_read_lock(); - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ @@ -1252,8 +1246,9 @@ void ip_vs_random_dropentry(struct net *net) __ip_vs_conn_put(cp); } } - rcu_read_unlock(); + cond_resched_rcu(); } + rcu_read_unlock(); } @@ -1267,11 +1262,8 @@ static void ip_vs_conn_flush(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); flush_again: + rcu_read_lock(); for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - /* - * Lock is actually needed in this loop. - */ - rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) @@ -1286,8 +1278,9 @@ flush_again: __ip_vs_conn_put(cp); } } - rcu_read_unlock(); + cond_resched_rcu(); } + rcu_read_unlock(); /* the counter may be not NULL, because maybe some conn entries are run by slow timer handler or unhashed but still referred */ -- cgit v1.2.3 From 5f38a11274f0e74ec0e499bc779d355510b39790 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2013 23:09:56 +0200 Subject: mac80211: assign AP_VLAN hw queues correctly A lot of code in mac80211 assumes that the hw queues are set up correctly for all interfaces (except for monitor) but this isn't true for AP_VLAN interfaces. Fix this by copying the AP master configuration when an AP VLAN is brought up, after this the AP interface can't change its configuration any more and needs to be brought down to change it, which also forces AP_VLAN interfaces down, so just copying in open() is sufficient. Reported-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 68f51c3af49..00e2238355f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -474,6 +474,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) master->control_port_protocol; sdata->control_port_no_encrypt = master->control_port_no_encrypt; + sdata->vif.cab_queue = master->vif.cab_queue; + memcpy(sdata->vif.hw_queue, master->vif.hw_queue, + sizeof(sdata->vif.hw_queue)); break; } case NL80211_IFTYPE_AP: -- cgit v1.2.3 From 4c8a9d4bfaf7dbc7d2168494904d79d22cc01db7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 May 2013 01:06:09 +0200 Subject: mac80211: close AP_VLAN interfaces before unregistering all Since Eric's commit efe117ab8 ("Speedup ieee80211_remove_interfaces") there's a bug in mac80211 when it unregisters with AP_VLAN interfaces up. If the AP_VLAN interface was registered after the AP it belongs to (which is the typical case) and then we get into this code path, unregister_netdevice_many() will crash because it isn't prepared to deal with interfaces being closed in the middle of it. Exactly this happens though, because we iterate the list, find the AP master this AP_VLAN belongs to and dev_close() the dependent VLANs. After this, unregister_netdevice_many() won't pick up the fact that the AP_VLAN is already down and will do it again, causing a crash. Cc: stable@vger.kernel.org [2.6.33+] Cc: Eric Dumazet Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 00e2238355f..ceef64426a8 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1703,6 +1703,15 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) ASSERT_RTNL(); + /* + * Close all AP_VLAN interfaces first, as otherwise they + * might be closed while the AP interface they belong to + * is closed, causing unregister_netdevice_many() to crash. + */ + list_for_each_entry(sdata, &local->interfaces, list) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + dev_close(sdata->dev); + mutex_lock(&local->iflist_mtx); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); -- cgit v1.2.3 From 161f65ba3583b84b4714f21dbee263f99824c516 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 22 May 2013 07:49:34 +0000 Subject: bridge: Set vlan_features to allow offloads on vlans. When vlan device is configured on top of the brige, it does not support any offload capabilities because the bridge device does not initiliaze vlan_fatures. Set vlan_fatures to be equivalent to hw_fatures. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_device.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 967312803e4..75f3239130f 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -22,6 +22,9 @@ #include #include "br_private.h" +#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ + NETIF_F_GSO_MASK | NETIF_F_HW_CSUM) + /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -346,12 +349,10 @@ void br_dev_setup(struct net_device *dev) dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | - NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX; - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX; + dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | + NETIF_F_HW_VLAN_CTAG_TX; + dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; + dev->vlan_features = COMMON_FEATURES; br->dev = dev; spin_lock_init(&br->lock); -- cgit v1.2.3 From 786677d100600b7f6089bae0d3967c1b901a6141 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 24 May 2013 12:05:45 +0200 Subject: mac80211: add STBC flag for radiotap Some chips can tell us if received frame was encoded with STBC or not. To make this information available in user space we can use updated radiotap specification: http://www.radiotap.org/defined-fields/MCS This patch will set number of STBC encoded spatial streams (Nss). The HAVE_STBC flag should be provided by driver. Signed-off-by: Oleksij Rempel Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6e2c8c5236c..7507f7cdd68 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -258,6 +258,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos += 2; if (status->flag & RX_FLAG_HT) { + unsigned int stbc; + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); *pos++ = local->hw.radiotap_mcs_details; *pos = 0; @@ -267,6 +269,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos |= IEEE80211_RADIOTAP_MCS_BW_40; if (status->flag & RX_FLAG_HT_GF) *pos |= IEEE80211_RADIOTAP_MCS_FMT_GF; + stbc = (status->flag & RX_FLAG_STBC_MASK) >> RX_FLAG_STBC_SHIFT; + *pos |= stbc << IEEE80211_RADIOTAP_MCS_STBC_SHIFT; pos++; *pos++ = status->rate_idx; } -- cgit v1.2.3 From 5e4b6f5698421d94226cc2f80eae6d613c9acef8 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 16 May 2013 20:11:08 +0300 Subject: cfg80211: Allow TDLS peer AID to be configured for VHT VHT uses peer AID in the PARTIAL_AID field in TDLS frames. The current design for TDLS is to first add a dummy STA entry before completing TDLS Setup and then update information on this STA entry based on what was received from the peer during the setup exchange. In theory, this could use NL80211_ATTR_STA_AID to set the peer AID just like this is used in AP mode to set the AID of an association station. However, existing cfg80211 validation rules prevent this attribute from being used with set_station operation. To avoid interoperability issues between different kernel and user space version combinations, introduce a new nl80211 attribute for the purpose of setting TDLS peer AID. This attribute can be used in both the new_station and set_station operations. It is not supposed to be allowed to change the AID value during the lifetime of the STA entry, but that validation is left for drivers to do in the change_station callback. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5f10f7acfa0..14276af7964 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -378,6 +378,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 }, }; /* policy for the key attributes */ @@ -3872,6 +3873,8 @@ static int nl80211_set_station_tdls(struct genl_info *info, struct station_parameters *params) { /* Dummy STA entry gets updated once the peer capabilities are known */ + if (info->attrs[NL80211_ATTR_PEER_AID]) + params->aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params->ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); @@ -4012,7 +4015,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_AID]) + if (!info->attrs[NL80211_ATTR_STA_AID] && + !info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -4023,7 +4027,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (info->attrs[NL80211_ATTR_STA_AID]) + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + else + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; -- cgit v1.2.3 From d4a5a48976d12ab340ed34605b5f5049b123d868 Mon Sep 17 00:00:00 2001 From: Ashok Nagarajan Date: Mon, 13 May 2013 17:08:04 -0700 Subject: mac80211: Move mesh estab_plinks outside mesh_stats debug group As estab_plinks is not a statistics member, don't show its debug information along with other mesh stat members Signed-off-by: Ashok Nagarajan Signed-off-by: Johannes Berg --- net/mac80211/debugfs_netdev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 14abcf44f97..f83074fe667 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -471,6 +471,8 @@ __IEEE80211_IF_FILE_W(tsf); IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); #ifdef CONFIG_MAC80211_MESH +IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); + /* Mesh stats attributes */ IEEE80211_IF_FILE(fwded_mcast, u.mesh.mshstats.fwded_mcast, DEC); IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC); @@ -480,7 +482,6 @@ IEEE80211_IF_FILE(dropped_frames_congestion, u.mesh.mshstats.dropped_frames_congestion, DEC); IEEE80211_IF_FILE(dropped_frames_no_route, u.mesh.mshstats.dropped_frames_no_route, DEC); -IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); /* Mesh parameters */ IEEE80211_IF_FILE(dot11MeshMaxRetries, @@ -583,6 +584,7 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata) static void add_mesh_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD_MODE(tsf, 0600); + DEBUGFS_ADD_MODE(estab_plinks, 0400); } static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) @@ -598,7 +600,6 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) MESHSTATS_ADD(dropped_frames_ttl); MESHSTATS_ADD(dropped_frames_no_route); MESHSTATS_ADD(dropped_frames_congestion); - MESHSTATS_ADD(estab_plinks); #undef MESHSTATS_ADD } -- cgit v1.2.3 From b422c6cd7e93bb613030f14d7d8a0cc73f115629 Mon Sep 17 00:00:00 2001 From: Ashok Nagarajan Date: Fri, 10 May 2013 17:50:51 -0700 Subject: {cfg,mac}80211: move mandatory rates calculation to cfg80211 Move mandatory rates calculation to cfg80211, shared with non mac80211 drivers. Signed-off-by: Ashok Nagarajan [extend documentation] Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 10 +++++++--- net/mac80211/ieee80211_i.h | 3 --- net/mac80211/mesh.c | 5 +++-- net/mac80211/util.c | 26 -------------------------- net/wireless/util.c | 23 +++++++++++++++++++++++ 5 files changed, 33 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 170f9a7fa31..956ba6316da 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -341,6 +341,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_supported_band *sband; int band; /* @@ -380,8 +381,9 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, sta->last_rx = jiffies; /* make sure mandatory rates are always added */ + sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); return ieee80211_ibss_finish_sta(sta, auth); } @@ -492,7 +494,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, prev_rates = sta->sta.supp_rates[band]; /* make sure mandatory rates are always added */ sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); if (sta->sta.supp_rates[band] != prev_rates) { ibss_dbg(sdata, @@ -624,6 +626,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_supported_band *sband; int band; /* @@ -658,8 +661,9 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, sta->last_rx = jiffies; /* make sure mandatory rates are always added */ + sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); spin_lock(&ifibss->incomplete_lock); list_add(&sta->list, &ifibss->incomplete_stations); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 158e6eb188d..b7cbd4ebf0e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1505,9 +1505,6 @@ static inline void ieee802_11_parse_elems(u8 *start, size_t len, bool action, ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0); } -u32 ieee80211_mandatory_rates(struct ieee80211_local *local, - enum ieee80211_band band); - void ieee80211_dynamic_ps_enable_work(struct work_struct *work); void ieee80211_dynamic_ps_disable_work(struct work_struct *work); void ieee80211_dynamic_ps_timer(unsigned long data); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index c13db9ad394..c14bb816c6a 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -741,6 +741,8 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT; enum ieee80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_supported_band *sband = + sdata->local->hw.wiphy->bands[band]; local->fif_other_bss++; /* mesh ifaces must set allmulti to forward mcast traffic */ @@ -758,8 +760,7 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.ht_operation_mode = ifmsh->mshcfg.ht_opmode; sdata->vif.bss_conf.enable_beacon = true; - sdata->vif.bss_conf.basic_rates = - ieee80211_mandatory_rates(local, band); + sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(sband); changed |= ieee80211_mps_local_status_update(sdata); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3f87fa468b1..707953fd832 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1072,32 +1072,6 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, ieee80211_set_wmm_default(sdata, true); } -u32 ieee80211_mandatory_rates(struct ieee80211_local *local, - enum ieee80211_band band) -{ - struct ieee80211_supported_band *sband; - struct ieee80211_rate *bitrates; - u32 mandatory_rates; - enum ieee80211_rate_flags mandatory_flag; - int i; - - sband = local->hw.wiphy->bands[band]; - if (WARN_ON(!sband)) - return 1; - - if (band == IEEE80211_BAND_2GHZ) - mandatory_flag = IEEE80211_RATE_MANDATORY_B; - else - mandatory_flag = IEEE80211_RATE_MANDATORY_A; - - bitrates = sband->bitrates; - mandatory_rates = 0; - for (i = 0; i < sband->n_bitrates; i++) - if (bitrates[i].flags & mandatory_flag) - mandatory_rates |= BIT(i); - return mandatory_rates; -} - void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, const u8 *extra, size_t extra_len, const u8 *da, diff --git a/net/wireless/util.c b/net/wireless/util.c index b11052be09b..0962f107f57 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -33,6 +33,29 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_get_response_rate); +u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband) +{ + struct ieee80211_rate *bitrates; + u32 mandatory_rates = 0; + enum ieee80211_rate_flags mandatory_flag; + int i; + + if (WARN_ON(!sband)) + return 1; + + if (sband->band == IEEE80211_BAND_2GHZ) + mandatory_flag = IEEE80211_RATE_MANDATORY_B; + else + mandatory_flag = IEEE80211_RATE_MANDATORY_A; + + bitrates = sband->bitrates; + for (i = 0; i < sband->n_bitrates; i++) + if (bitrates[i].flags & mandatory_flag) + mandatory_rates |= BIT(i); + return mandatory_rates; +} +EXPORT_SYMBOL(ieee80211_mandatory_rates); + int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) { /* see 802.11 17.3.8.3.2 and Annex J -- cgit v1.2.3 From 9f419f3851041e0c8170629f0639813dbfc79d5e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 May 2013 21:34:22 +0200 Subject: cfg80211: move cfg80211_get_dev_from_ifindex under wext The function is only used and needed by the wext code for scanning, so move it there. Signed-off-by: Johannes Berg --- net/wireless/core.c | 21 --------------------- net/wireless/core.h | 4 ---- net/wireless/scan.c | 21 +++++++++++++++++++++ 3 files changed, 21 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 58e69d69160..cc49cf11c7a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -90,27 +90,6 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) return &rdev->wiphy; } -struct cfg80211_registered_device * -cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) -{ - struct cfg80211_registered_device *rdev = ERR_PTR(-ENODEV); - struct net_device *dev; - - mutex_lock(&cfg80211_mutex); - dev = dev_get_by_index(net, ifindex); - if (!dev) - goto out; - if (dev->ieee80211_ptr) { - rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); - mutex_lock(&rdev->mtx); - } else - rdev = ERR_PTR(-ENODEV); - dev_put(dev); - out: - mutex_unlock(&cfg80211_mutex); - return rdev; -} - /* requires cfg80211_mutex to be held */ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname) diff --git a/net/wireless/core.h b/net/wireless/core.h index fd35dae547c..95b29075a9c 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -164,10 +164,6 @@ int get_wiphy_idx(struct wiphy *wiphy); /* requires cfg80211_rdev_mutex to be held! */ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); -/* identical to cfg80211_get_dev_from_info but only operate on ifindex */ -extern struct cfg80211_registered_device * -cfg80211_get_dev_from_ifindex(struct net *net, int ifindex); - int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct net *net); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index fd99ea495b7..2ce44a712f1 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1040,6 +1040,27 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) EXPORT_SYMBOL(cfg80211_unlink_bss); #ifdef CONFIG_CFG80211_WEXT +static struct cfg80211_registered_device * +cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) +{ + struct cfg80211_registered_device *rdev = ERR_PTR(-ENODEV); + struct net_device *dev; + + mutex_lock(&cfg80211_mutex); + dev = dev_get_by_index(net, ifindex); + if (!dev) + goto out; + if (dev->ieee80211_ptr) { + rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + mutex_lock(&rdev->mtx); + } else + rdev = ERR_PTR(-ENODEV); + dev_put(dev); + out: + mutex_unlock(&cfg80211_mutex); + return rdev; +} + int cfg80211_wext_siwscan(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) -- cgit v1.2.3 From 73810b77def898b43a97638478692922b7f820eb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 May 2013 21:49:02 +0200 Subject: cfg80211: use atomic_t for wiphy counter There's no need to lock, we can just use an atomic_t. Signed-off-by: Johannes Berg --- net/wireless/core.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index cc49cf11c7a..9416b8f55f5 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -289,7 +289,7 @@ static void cfg80211_event_work(struct work_struct *work) struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) { - static int wiphy_counter; + static atomic_t wiphy_counter = ATOMIC_INIT(0); struct cfg80211_registered_device *rdev; int alloc_size; @@ -311,20 +311,15 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->ops = ops; - mutex_lock(&cfg80211_mutex); - - rdev->wiphy_idx = wiphy_counter++; + rdev->wiphy_idx = atomic_inc_return(&wiphy_counter); if (unlikely(rdev->wiphy_idx < 0)) { - wiphy_counter--; - mutex_unlock(&cfg80211_mutex); /* ugh, wrapped! */ + atomic_dec(&wiphy_counter); kfree(rdev); return NULL; } - mutex_unlock(&cfg80211_mutex); - /* give it a proper name */ dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); -- cgit v1.2.3 From 5fe231e873729fa2f57cdc417d5c1f80871e2d7d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 May 2013 21:45:15 +0200 Subject: cfg80211: vastly simplify locking Virtually all code paths in cfg80211 already (need to) hold the RTNL. As such, there's little point in having another four mutexes for various parts of the code, they just cause lock ordering issues (and much of the time, the RTNL and a few of the others need thus be held.) Simplify all this by getting rid of the extra four mutexes and just use the RTNL throughout. Only a few code changes were needed to do this and we can get rid of a work struct for bonus points. Signed-off-by: Johannes Berg --- net/wireless/core.c | 166 ++++++++---------------------------------- net/wireless/core.h | 32 +------- net/wireless/debugfs.c | 4 +- net/wireless/ibss.c | 10 +-- net/wireless/mesh.c | 2 - net/wireless/mlme.c | 12 +-- net/wireless/nl80211.c | 178 +++++++++------------------------------------ net/wireless/reg.c | 36 +++------ net/wireless/scan.c | 42 +++++------ net/wireless/sme.c | 26 +------ net/wireless/util.c | 11 --- net/wireless/wext-compat.c | 22 +----- net/wireless/wext-sme.c | 18 ----- 13 files changed, 115 insertions(+), 444 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 9416b8f55f5..5fc642d4071 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -36,12 +36,10 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("wireless configuration support"); MODULE_ALIAS_GENL_FAMILY(NL80211_GENL_NAME); -/* RCU-protected (and cfg80211_mutex for writers) */ +/* RCU-protected (and RTNL for writers) */ LIST_HEAD(cfg80211_rdev_list); int cfg80211_rdev_list_generation; -DEFINE_MUTEX(cfg80211_mutex); - /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; @@ -53,12 +51,11 @@ module_param(cfg80211_disable_40mhz_24ghz, bool, 0644); MODULE_PARM_DESC(cfg80211_disable_40mhz_24ghz, "Disable 40MHz support in the 2.4GHz band"); -/* requires cfg80211_mutex to be held! */ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) { struct cfg80211_registered_device *result = NULL, *rdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (rdev->wiphy_idx == wiphy_idx) { @@ -77,12 +74,11 @@ int get_wiphy_idx(struct wiphy *wiphy) return rdev->wiphy_idx; } -/* requires cfg80211_rdev_mutex to be held! */ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) { struct cfg80211_registered_device *rdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); rdev = cfg80211_rdev_by_wiphy_idx(wiphy_idx); if (!rdev) @@ -90,14 +86,13 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) return &rdev->wiphy; } -/* requires cfg80211_mutex to be held */ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname) { struct cfg80211_registered_device *rdev2; int wiphy_idx, taken = -1, result, digits; - assert_cfg80211_lock(); + ASSERT_RTNL(); /* prohibit calling the thing phy%d when %d is not its number */ sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken); @@ -195,8 +190,7 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { - lockdep_assert_held(&rdev->devlist_mtx); - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) return; @@ -235,8 +229,6 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) rtnl_lock(); - /* read-only iteration need not hold the devlist_mtx */ - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { dev_close(wdev->netdev); @@ -245,12 +237,7 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) /* otherwise, check iftype */ switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - /* but this requires it */ - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); cfg80211_stop_p2p_device(rdev, wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); break; default: break; @@ -278,10 +265,7 @@ static void cfg80211_event_work(struct work_struct *work) event_work); rtnl_lock(); - cfg80211_lock_rdev(rdev); - cfg80211_process_rdev_events(rdev); - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } @@ -323,9 +307,6 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) /* give it a proper name */ dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); - mutex_init(&rdev->mtx); - mutex_init(&rdev->devlist_mtx); - mutex_init(&rdev->sched_scan_mtx); INIT_LIST_HEAD(&rdev->wdev_list); INIT_LIST_HEAD(&rdev->beacon_registrations); spin_lock_init(&rdev->beacon_registrations_lock); @@ -573,11 +554,11 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); - mutex_lock(&cfg80211_mutex); + rtnl_lock(); res = device_add(&rdev->wiphy.dev); if (res) { - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); return res; } @@ -606,25 +587,18 @@ int wiphy_register(struct wiphy *wiphy) } cfg80211_debugfs_rdev_add(rdev); - mutex_unlock(&cfg80211_mutex); - /* - * due to a locking dependency this has to be outside of the - * cfg80211_mutex lock - */ res = rfkill_register(rdev->rfkill); if (res) { device_del(&rdev->wiphy.dev); - mutex_lock(&cfg80211_mutex); debugfs_remove_recursive(rdev->wiphy.debugfsdir); list_del_rcu(&rdev->list); wiphy_regulatory_deregister(wiphy); - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); return res; } - rtnl_lock(); rdev->wiphy.registered = true; rtnl_unlock(); return 0; @@ -654,25 +628,19 @@ void wiphy_unregister(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - rtnl_lock(); - rdev->wiphy.registered = false; - rtnl_unlock(); - - rfkill_unregister(rdev->rfkill); - - /* protect the device list */ - mutex_lock(&cfg80211_mutex); - wait_event(rdev->dev_wait, ({ int __count; - mutex_lock(&rdev->devlist_mtx); + rtnl_lock(); __count = rdev->opencount; - mutex_unlock(&rdev->devlist_mtx); + rtnl_unlock(); __count == 0; })); - mutex_lock(&rdev->devlist_mtx); + rtnl_lock(); + rdev->wiphy.registered = false; + + rfkill_unregister(rdev->rfkill); + BUG_ON(!list_empty(&rdev->wdev_list)); - mutex_unlock(&rdev->devlist_mtx); /* * First remove the hardware from everywhere, this makes @@ -682,20 +650,6 @@ void wiphy_unregister(struct wiphy *wiphy) list_del_rcu(&rdev->list); synchronize_rcu(); - /* - * Try to grab rdev->mtx. If a command is still in progress, - * hopefully the driver will refuse it since it's tearing - * down the device already. We wait for this command to complete - * before unlinking the item from the list. - * Note: as codified by the BUG_ON above we cannot get here if - * a virtual interface is still present. Hence, we can only get - * to lock contention here if userspace issues a command that - * identified the hardware by wiphy index. - */ - cfg80211_lock_rdev(rdev); - /* nothing */ - cfg80211_unlock_rdev(rdev); - /* * If this device got a regulatory hint tell core its * free to listen now to a new shiny device regulatory hint @@ -705,7 +659,7 @@ void wiphy_unregister(struct wiphy *wiphy) cfg80211_rdev_list_generation++; device_del(&rdev->wiphy.dev); - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); flush_work(&rdev->scan_done_wk); cancel_work_sync(&rdev->conn_work); @@ -723,9 +677,6 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) struct cfg80211_internal_bss *scan, *tmp; struct cfg80211_beacon_registration *reg, *treg; rfkill_destroy(rdev->rfkill); - mutex_destroy(&rdev->mtx); - mutex_destroy(&rdev->devlist_mtx); - mutex_destroy(&rdev->sched_scan_mtx); list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { list_del(®->list); kfree(reg); @@ -750,36 +701,6 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); -static void wdev_cleanup_work(struct work_struct *work) -{ - struct wireless_dev *wdev; - struct cfg80211_registered_device *rdev; - - wdev = container_of(work, struct wireless_dev, cleanup_work); - rdev = wiphy_to_dev(wdev->wiphy); - - mutex_lock(&rdev->sched_scan_mtx); - - if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, true); - } - - if (WARN_ON(rdev->sched_scan_req && - rdev->sched_scan_req->dev == wdev->netdev)) { - __cfg80211_stop_sched_scan(rdev, false); - } - - mutex_unlock(&rdev->sched_scan_mtx); - - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - wake_up(&rdev->dev_wait); - - dev_put(wdev->netdev); -} - void cfg80211_unregister_wdev(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -789,8 +710,6 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) if (WARN_ON(wdev->netdev)) return; - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); list_del_rcu(&wdev->list); rdev->devlist_generation++; @@ -802,8 +721,6 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) WARN_ON_ONCE(1); break; } - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); } EXPORT_SYMBOL(cfg80211_unregister_wdev); @@ -822,7 +739,7 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, } void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) + struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; @@ -832,9 +749,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - mutex_lock(&rdev->sched_scan_mtx); __cfg80211_stop_sched_scan(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); wdev_lock(wdev); #ifdef CONFIG_CFG80211_WEXT @@ -887,13 +802,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * are added with nl80211. */ mutex_init(&wdev->mtx); - INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); - mutex_lock(&rdev->devlist_mtx); wdev->identifier = ++rdev->wdev_id; list_add_rcu(&wdev->list, &rdev->wdev_list); rdev->devlist_generation++; @@ -906,7 +819,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, } wdev->netdev = dev; wdev->sme_state = CFG80211_SME_IDLE; - mutex_unlock(&rdev->devlist_mtx); #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = -1; wdev->wext.default_mgmt_key = -1; @@ -932,26 +844,22 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_DOWN: cfg80211_update_iface_num(rdev, wdev->iftype, -1); - dev_hold(dev); - queue_work(cfg80211_wq, &wdev->cleanup_work); + if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + if (WARN_ON(!rdev->scan_req->notified)) + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, true); + } + + if (WARN_ON(rdev->sched_scan_req && + rdev->sched_scan_req->dev == wdev->netdev)) { + __cfg80211_stop_sched_scan(rdev, false); + } + + rdev->opencount--; + wake_up(&rdev->dev_wait); break; case NETDEV_UP: - /* - * If we have a really quick DOWN/UP succession we may - * have this work still pending ... cancel it and see - * if it was pending, in which case we need to account - * for some of the work it would have done. - */ - if (cancel_work_sync(&wdev->cleanup_work)) { - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - dev_put(dev); - } cfg80211_update_iface_num(rdev, wdev->iftype, 1); - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); switch (wdev->iftype) { #ifdef CONFIG_CFG80211_WEXT @@ -983,10 +891,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; } wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); rdev->opencount++; - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); /* * Configure power management to the driver here so that its @@ -1002,12 +907,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, } break; case NETDEV_UNREGISTER: - /* - * NB: cannot take rdev->mtx here because this may be - * called within code protected by it when interfaces - * are removed with nl80211. - */ - mutex_lock(&rdev->devlist_mtx); /* * It is possible to get NETDEV_UNREGISTER * multiple times. To detect that, check @@ -1024,7 +923,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, kfree(wdev->wext.keys); #endif } - mutex_unlock(&rdev->devlist_mtx); /* * synchronise (so that we won't find this netdev * from other code any more) and then clear the list @@ -1044,9 +942,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, return notifier_from_errno(-EOPNOTSUPP); if (rfkill_blocked(rdev->rfkill)) return notifier_from_errno(-ERFKILL); - mutex_lock(&rdev->devlist_mtx); ret = cfg80211_can_add_interface(rdev, wdev->iftype); - mutex_unlock(&rdev->devlist_mtx); if (ret) return notifier_from_errno(ret); break; @@ -1064,12 +960,10 @@ static void __net_exit cfg80211_pernet_exit(struct net *net) struct cfg80211_registered_device *rdev; rtnl_lock(); - mutex_lock(&cfg80211_mutex); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (net_eq(wiphy_net(&rdev->wiphy), net)) WARN_ON(cfg80211_switch_netns(rdev, &init_net)); } - mutex_unlock(&cfg80211_mutex); rtnl_unlock(); } diff --git a/net/wireless/core.h b/net/wireless/core.h index 95b29075a9c..d21a0fc0140 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -5,7 +5,6 @@ */ #ifndef __NET_WIRELESS_CORE_H #define __NET_WIRELESS_CORE_H -#include #include #include #include @@ -23,11 +22,6 @@ struct cfg80211_registered_device { const struct cfg80211_ops *ops; struct list_head list; - /* we hold this mutex during any call so that - * we cannot do multiple calls at once, and also - * to avoid the deregister call to proceed while - * any call is in progress */ - struct mutex mtx; /* rfkill support */ struct rfkill_ops rfkill_ops; @@ -49,9 +43,7 @@ struct cfg80211_registered_device { /* wiphy index, internal only */ int wiphy_idx; - /* associated wireless interfaces */ - struct mutex devlist_mtx; - /* protected by devlist_mtx or RCU */ + /* associated wireless interfaces, protected by rtnl or RCU */ struct list_head wdev_list; int devlist_generation, wdev_id; int opencount; /* also protected by devlist_mtx */ @@ -75,8 +67,6 @@ struct cfg80211_registered_device { struct work_struct scan_done_wk; struct work_struct sched_scan_results_wk; - struct mutex sched_scan_mtx; - #ifdef CONFIG_NL80211_TESTMODE struct genl_info *testmode_info; #endif @@ -120,15 +110,9 @@ cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) } extern struct workqueue_struct *cfg80211_wq; -extern struct mutex cfg80211_mutex; extern struct list_head cfg80211_rdev_list; extern int cfg80211_rdev_list_generation; -static inline void assert_cfg80211_lock(void) -{ - lockdep_assert_held(&cfg80211_mutex); -} - struct cfg80211_internal_bss { struct list_head list; struct list_head hidden_list; @@ -161,23 +145,11 @@ static inline void cfg80211_unhold_bss(struct cfg80211_internal_bss *bss) struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx); int get_wiphy_idx(struct wiphy *wiphy); -/* requires cfg80211_rdev_mutex to be held! */ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct net *net); -static inline void cfg80211_lock_rdev(struct cfg80211_registered_device *rdev) -{ - mutex_lock(&rdev->mtx); -} - -static inline void cfg80211_unlock_rdev(struct cfg80211_registered_device *rdev) -{ - BUG_ON(IS_ERR(rdev) || !rdev); - mutex_unlock(&rdev->mtx); -} - static inline void wdev_lock(struct wireless_dev *wdev) __acquires(wdev) { @@ -192,7 +164,7 @@ static inline void wdev_unlock(struct wireless_dev *wdev) mutex_unlock(&wdev->mtx); } -#define ASSERT_RDEV_LOCK(rdev) lockdep_assert_held(&(rdev)->mtx) +#define ASSERT_RDEV_LOCK(rdev) ASSERT_RTNL() #define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 920cabe0461..90d05003662 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -74,7 +74,7 @@ static ssize_t ht40allow_map_read(struct file *file, if (!buf) return -ENOMEM; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); for (band = 0; band < IEEE80211_NUM_BANDS; band++) { sband = wiphy->bands[band]; @@ -85,7 +85,7 @@ static ssize_t ht40allow_map_read(struct file *file, buf, buf_size, offset); } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); r = simple_read_from_buffer(user_buf, count, ppos, buf, offset); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index d80e47194d4..5449c5a6de8 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -152,11 +152,11 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - mutex_lock(&rdev->devlist_mtx); + ASSERT_RTNL(); + wdev_lock(wdev); err = __cfg80211_join_ibss(rdev, dev, params, connkeys); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -359,11 +359,9 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, wdev->wext.ibss.channel_fixed = false; } - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -429,11 +427,9 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, memcpy(wdev->wext.ibss.ssid, ssid, len); wdev->wext.ibss.ssid_len = len; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -512,11 +508,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, } else wdev->wext.ibss.bssid = NULL; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 9546ad21055..5dfb289ab76 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -186,11 +186,9 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = __cfg80211_join_mesh(rdev, dev, setup, conf); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index c21e32f9549..68b40f21bc3 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -313,14 +313,14 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, { int err; - mutex_lock(&rdev->devlist_mtx); + ASSERT_RTNL(); + wdev_lock(dev->ieee80211_ptr); err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, ssid, ssid_len, ie, ie_len, key, key_len, key_idx, sae_data, sae_data_len); wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -424,12 +424,12 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - mutex_lock(&rdev->devlist_mtx); + ASSERT_RTNL(); + wdev_lock(wdev); err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, ssid, ssid_len, req); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -844,7 +844,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) dfs_update_channels_wk); wiphy = &rdev->wiphy; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { sband = wiphy->bands[bandid]; if (!sband) @@ -877,7 +877,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) check_again = true; } } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); /* reschedule if there are other channels waiting to be cleared again */ if (check_again) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5bcf3a5b646..74cdb1a0cf3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -59,7 +59,7 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) int wiphy_idx = -1; int ifidx = -1; - assert_cfg80211_lock(); + ASSERT_RTNL(); if (!have_ifidx && !have_wdev_id) return ERR_PTR(-EINVAL); @@ -80,7 +80,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) if (have_wdev_id && rdev->wiphy_idx != wiphy_idx) continue; - mutex_lock(&rdev->devlist_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (have_ifidx && wdev->netdev && wdev->netdev->ifindex == ifidx) { @@ -92,7 +91,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) break; } } - mutex_unlock(&rdev->devlist_mtx); if (result) break; @@ -109,7 +107,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) struct cfg80211_registered_device *rdev = NULL, *tmp; struct net_device *netdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); if (!attrs[NL80211_ATTR_WIPHY] && !attrs[NL80211_ATTR_IFINDEX] && @@ -128,14 +126,12 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) tmp = cfg80211_rdev_by_wiphy_idx(wdev_id >> 32); if (tmp) { /* make sure wdev exists */ - mutex_lock(&tmp->devlist_mtx); list_for_each_entry(wdev, &tmp->wdev_list, list) { if (wdev->identifier != (u32)wdev_id) continue; found = true; break; } - mutex_unlock(&tmp->devlist_mtx); if (!found) tmp = NULL; @@ -182,19 +178,6 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) /* * This function returns a pointer to the driver * that the genl_info item that is passed refers to. - * If successful, it returns non-NULL and also locks - * the driver's mutex! - * - * This means that you need to call cfg80211_unlock_rdev() - * before being allowed to acquire &cfg80211_mutex! - * - * This is necessary because we need to lock the global - * mutex to get an item off the list safely, and then - * we lock the rdev mutex so it doesn't go away under us. - * - * We don't want to keep cfg80211_mutex locked - * for all the time in order to allow requests on - * other interfaces to go through at the same time. * * The result of this can be a PTR_ERR and hence must * be checked with IS_ERR() for errors. @@ -202,20 +185,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) static struct cfg80211_registered_device * cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info) { - struct cfg80211_registered_device *rdev; - - mutex_lock(&cfg80211_mutex); - rdev = __cfg80211_rdev_from_attrs(netns, info->attrs); - - /* if it is not an error we grab the lock on - * it to assure it won't be going away while - * we operate on it */ - if (!IS_ERR(rdev)) - mutex_lock(&rdev->mtx); - - mutex_unlock(&cfg80211_mutex); - - return rdev; + return __cfg80211_rdev_from_attrs(netns, info->attrs); } /* policy for the attributes */ @@ -456,7 +426,6 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, int err; rtnl_lock(); - mutex_lock(&cfg80211_mutex); if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, @@ -485,14 +454,12 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, *rdev = wiphy_to_dev(wiphy); *wdev = NULL; - mutex_lock(&(*rdev)->devlist_mtx); list_for_each_entry(tmp, &(*rdev)->wdev_list, list) { if (tmp->identifier == cb->args[1]) { *wdev = tmp; break; } } - mutex_unlock(&(*rdev)->devlist_mtx); if (!*wdev) { err = -ENODEV; @@ -500,19 +467,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, } } - cfg80211_lock_rdev(*rdev); - - mutex_unlock(&cfg80211_mutex); return 0; out_unlock: - mutex_unlock(&cfg80211_mutex); rtnl_unlock(); return err; } static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev) { - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } @@ -1568,7 +1530,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) struct nlattr **tb = nl80211_fam.attrbuf; int res; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, nl80211_fam.maxattr, nl80211_policy); if (res == 0) { @@ -1582,10 +1544,8 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); netdev = dev_get_by_index(sock_net(skb->sk), ifidx); - if (!netdev) { - mutex_unlock(&cfg80211_mutex); + if (!netdev) return -ENODEV; - } if (netdev->ieee80211_ptr) { dev = wiphy_to_dev( netdev->ieee80211_ptr->wiphy); @@ -1629,7 +1589,6 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) !skb->len && cb->min_dump_alloc < 4096) { cb->min_dump_alloc = 4096; - mutex_unlock(&cfg80211_mutex); return 1; } idx--; @@ -1638,7 +1597,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) } while (cb->args[1] > 0); break; } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); cb->args[0] = idx; @@ -1793,7 +1752,6 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, if (result) return result; - mutex_lock(&rdev->devlist_mtx); switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: @@ -1817,7 +1775,6 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, default: result = -EINVAL; } - mutex_unlock(&rdev->devlist_mtx); return result; } @@ -1866,6 +1823,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u32 frag_threshold = 0, rts_threshold = 0; u8 coverage_class = 0; + ASSERT_RTNL(); + /* * Try to find the wiphy and netdev. Normally this * function shouldn't need the netdev, but this is @@ -1875,31 +1834,25 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) * also passed a netdev to set_wiphy, so that it is * possible to let that go to the right netdev! */ - mutex_lock(&cfg80211_mutex); if (info->attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); netdev = dev_get_by_index(genl_info_net(info), ifindex); - if (netdev && netdev->ieee80211_ptr) { + if (netdev && netdev->ieee80211_ptr) rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy); - mutex_lock(&rdev->mtx); - } else + else netdev = NULL; } if (!netdev) { rdev = __cfg80211_rdev_from_attrs(genl_info_net(info), info->attrs); - if (IS_ERR(rdev)) { - mutex_unlock(&cfg80211_mutex); + if (IS_ERR(rdev)) return PTR_ERR(rdev); - } wdev = NULL; netdev = NULL; result = 0; - - mutex_lock(&rdev->mtx); } else wdev = netdev->ieee80211_ptr; @@ -1912,8 +1865,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = cfg80211_dev_rename( rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); - mutex_unlock(&cfg80211_mutex); - if (result) goto bad_res; @@ -2120,7 +2071,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } bad_res: - mutex_unlock(&rdev->mtx); if (netdev) dev_put(netdev); return result; @@ -2218,7 +2168,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; @@ -2228,7 +2178,6 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * } if_idx = 0; - mutex_lock(&rdev->devlist_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (if_idx < if_start) { if_idx++; @@ -2237,17 +2186,15 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev) < 0) { - mutex_unlock(&rdev->devlist_mtx); goto out; } if_idx++; } - mutex_unlock(&rdev->devlist_mtx); wp_idx++; } out: - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); cb->args[0] = wp_idx; cb->args[1] = if_idx; @@ -2480,11 +2427,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); - mutex_lock(&rdev->devlist_mtx); wdev->identifier = ++rdev->wdev_id; list_add_rcu(&wdev->list, &rdev->wdev_list); rdev->devlist_generation++; - mutex_unlock(&rdev->devlist_mtx); break; default: break; @@ -2993,8 +2938,6 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev; bool ret = false; - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) @@ -3008,8 +2951,6 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, break; } - mutex_unlock(&rdev->devlist_mtx); - return ret; } @@ -3171,13 +3112,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.radar_required = true; } - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, params.chandef.chan, CHAN_MODE_SHARED, radar_detect_width); - mutex_unlock(&rdev->devlist_mtx); - if (err) return err; @@ -4914,18 +4852,13 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) void *hdr = NULL; struct nlattr *nl_reg_rules; unsigned int i; - int err = -EINVAL; - - mutex_lock(&cfg80211_mutex); if (!cfg80211_regdomain) - goto out; + return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - err = -ENOBUFS; - goto out; - } + if (!msg) + return -ENOBUFS; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_REG); @@ -4984,8 +4917,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nla_nest_end(msg, nl_reg_rules); genlmsg_end(msg, hdr); - err = genlmsg_reply(msg, info); - goto out; + return genlmsg_reply(msg, info); nla_put_failure_rcu: rcu_read_unlock(); @@ -4993,10 +4925,7 @@ nla_put_failure: genlmsg_cancel(msg, hdr); put_failure: nlmsg_free(msg); - err = -EMSGSIZE; -out: - mutex_unlock(&cfg80211_mutex); - return err; + return -EMSGSIZE; } static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) @@ -5062,12 +4991,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) } } - mutex_lock(&cfg80211_mutex); - r = set_regdom(rd); /* set_regdom took ownership */ rd = NULL; - mutex_unlock(&cfg80211_mutex); bad_reg: kfree(rd); @@ -5117,7 +5043,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->scan) return -EOPNOTSUPP; - mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto unlock; @@ -5303,7 +5228,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } unlock: - mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -5375,8 +5299,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (ie_len > wiphy->max_sched_scan_ie_len) return -EINVAL; - mutex_lock(&rdev->sched_scan_mtx); - if (rdev->sched_scan_req) { err = -EINPROGRESS; goto out; @@ -5544,7 +5466,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, out_free: kfree(request); out: - mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -5552,17 +5473,12 @@ static int nl80211_stop_sched_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - int err; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || !rdev->ops->sched_scan_stop) return -EOPNOTSUPP; - mutex_lock(&rdev->sched_scan_mtx); - err = __cfg80211_stop_sched_scan(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); - - return err; + return __cfg80211_stop_sched_scan(rdev, false); } static int nl80211_start_radar_detection(struct sk_buff *skb, @@ -5594,12 +5510,11 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (!rdev->ops->start_radar_detection) return -EOPNOTSUPP; - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, chandef.chan, CHAN_MODE_SHARED, BIT(chandef.width)); if (err) - goto err_locked; + return err; err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef); if (!err) { @@ -5607,9 +5522,6 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, wdev->cac_started = true; wdev->cac_start_time = jiffies; } -err_locked: - mutex_unlock(&rdev->devlist_mtx); - return err; } @@ -6472,6 +6384,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb, void *data = NULL; int data_len = 0; + rtnl_lock(); + if (cb->args[0]) { /* * 0 is a valid index, but not valid for args[0], @@ -6483,18 +6397,16 @@ static int nl80211_testmode_dump(struct sk_buff *skb, nl80211_fam.attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) - return err; + goto out_err; - mutex_lock(&cfg80211_mutex); rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), nl80211_fam.attrbuf); if (IS_ERR(rdev)) { - mutex_unlock(&cfg80211_mutex); - return PTR_ERR(rdev); + err = PTR_ERR(rdev); + goto out_err; } phy_idx = rdev->wiphy_idx; rdev = NULL; - mutex_unlock(&cfg80211_mutex); if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]) cb->args[1] = @@ -6506,14 +6418,11 @@ static int nl80211_testmode_dump(struct sk_buff *skb, data_len = nla_len((void *)cb->args[1]); } - mutex_lock(&cfg80211_mutex); rdev = cfg80211_rdev_by_wiphy_idx(phy_idx); if (!rdev) { - mutex_unlock(&cfg80211_mutex); - return -ENOENT; + err = -ENOENT; + goto out_err; } - cfg80211_lock_rdev(rdev); - mutex_unlock(&cfg80211_mutex); if (!rdev->ops->testmode_dump) { err = -EOPNOTSUPP; @@ -6554,7 +6463,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, /* see above */ cb->args[0] = phy_idx + 1; out_err: - cfg80211_unlock_rdev(rdev); + rtnl_unlock(); return err; } @@ -8189,9 +8098,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (wdev->p2p_started) return 0; - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_add_interface(rdev, wdev->iftype); - mutex_unlock(&rdev->devlist_mtx); if (err) return err; @@ -8200,9 +8107,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) return err; wdev->p2p_started = true; - mutex_lock(&rdev->devlist_mtx); rdev->opencount++; - mutex_unlock(&rdev->devlist_mtx); return 0; } @@ -8218,11 +8123,7 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->stop_p2p_device) return -EOPNOTSUPP; - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); cfg80211_stop_p2p_device(rdev, wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); return 0; } @@ -8365,11 +8266,11 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[0] = rdev; } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV || ops->internal_flags & NL80211_FLAG_NEED_WDEV) { - mutex_lock(&cfg80211_mutex); + ASSERT_RTNL(); + wdev = __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs); if (IS_ERR(wdev)) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return PTR_ERR(wdev); @@ -8380,7 +8281,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -EINVAL; @@ -8394,7 +8294,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, if (dev) { if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !netif_running(dev)) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -ENETDOWN; @@ -8403,17 +8302,12 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, dev_hold(dev); } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { if (!wdev->p2p_started) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -ENETDOWN; } } - cfg80211_lock_rdev(rdev); - - mutex_unlock(&cfg80211_mutex); - info->user_ptr[0] = rdev; } @@ -8423,8 +8317,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { - if (info->user_ptr[0]) - cfg80211_unlock_rdev(info->user_ptr[0]); if (info->user_ptr[1]) { if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) { struct wireless_dev *wdev = info->user_ptr[1]; @@ -8446,7 +8338,8 @@ static struct genl_ops nl80211_ops[] = { .dumpit = nl80211_dump_wiphy, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WIPHY, + .internal_flags = NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_WIPHY, @@ -8461,7 +8354,8 @@ static struct genl_ops nl80211_ops[] = { .dumpit = nl80211_dump_interface, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WDEV, + .internal_flags = NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_INTERFACE, @@ -8620,6 +8514,7 @@ static struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_REG, .doit = nl80211_get_reg, .policy = nl80211_policy, + .internal_flags = NL80211_FLAG_NEED_RTNL, /* can be retrieved by unprivileged users */ }, { @@ -8627,6 +8522,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_set_reg, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_REQ_SET_REG, @@ -9082,8 +8978,6 @@ static int nl80211_add_scan_req(struct sk_buff *msg, struct nlattr *nest; int i; - lockdep_assert_held(&rdev->sched_scan_mtx); - if (WARN_ON(!req)) return 0; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index cc35fbaa457..e7655961858 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -377,7 +377,7 @@ static void reg_regdb_search(struct work_struct *work) const struct ieee80211_regdomain *curdom, *regdom = NULL; int i; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); mutex_lock(®_regdb_search_mutex); while (!list_empty(®_regdb_search_list)) { @@ -402,7 +402,7 @@ static void reg_regdb_search(struct work_struct *work) if (!IS_ERR_OR_NULL(regdom)) set_regdom(regdom); - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); } static DECLARE_WORK(reg_regdb_work, reg_regdb_search); @@ -1225,7 +1225,7 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) struct cfg80211_registered_device *rdev; struct wiphy *wiphy; - assert_cfg80211_lock(); + ASSERT_RTNL(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { wiphy = &rdev->wiphy; @@ -1570,21 +1570,19 @@ static void reg_process_pending_hints(void) { struct regulatory_request *reg_request, *lr; - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); lr = get_last_request(); /* When last_request->processed becomes true this will be rescheduled */ if (lr && !lr->processed) { REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n"); - goto out; + return; } spin_lock(®_requests_lock); if (list_empty(®_requests_list)) { spin_unlock(®_requests_lock); - goto out; + return; } reg_request = list_first_entry(®_requests_list, @@ -1595,10 +1593,6 @@ static void reg_process_pending_hints(void) spin_unlock(®_requests_lock); reg_process_hint(reg_request, reg_request->initiator); - -out: - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } /* Processes beacon hints -- this has nothing to do with country IEs */ @@ -1607,9 +1601,6 @@ static void reg_process_pending_beacon_hints(void) struct cfg80211_registered_device *rdev; struct reg_beacon *pending_beacon, *tmp; - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); - /* This goes through the _pending_ beacon list */ spin_lock_bh(®_pending_beacons_lock); @@ -1626,14 +1617,16 @@ static void reg_process_pending_beacon_hints(void) } spin_unlock_bh(®_pending_beacons_lock); - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } static void reg_todo(struct work_struct *work) { + rtnl_lock(); + mutex_lock(®_mutex); reg_process_pending_hints(); reg_process_pending_beacon_hints(); + mutex_unlock(®_mutex); + rtnl_unlock(); } static void queue_regulatory_request(struct regulatory_request *request) @@ -1717,10 +1710,6 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) } EXPORT_SYMBOL(regulatory_hint); -/* - * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and - * therefore cannot iterate over the rdev list here. - */ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, const u8 *country_ie, u8 country_ie_len) { @@ -1752,7 +1741,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, /* * We will run this only upon a successful connection on cfg80211. * We leave conflict resolution to the workqueue, where can hold - * cfg80211_mutex. + * the RTNL. */ if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && lr->wiphy_idx != WIPHY_IDX_INVALID) @@ -1858,7 +1847,8 @@ static void restore_regulatory_settings(bool reset_user) LIST_HEAD(tmp_reg_req_list); struct cfg80211_registered_device *rdev; - mutex_lock(&cfg80211_mutex); + ASSERT_RTNL(); + mutex_lock(®_mutex); reset_regdomains(true, &world_regdom); @@ -1915,7 +1905,6 @@ static void restore_regulatory_settings(bool reset_user) spin_unlock(®_requests_lock); mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); REG_DBG_PRINT("Kicking the queue\n"); @@ -2297,7 +2286,6 @@ void wiphy_regulatory_register(struct wiphy *wiphy) mutex_unlock(®_mutex); } -/* Caller must hold cfg80211_mutex */ void wiphy_regulatory_deregister(struct wiphy *wiphy) { struct wiphy *request_wiphy = NULL; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2ce44a712f1..dd01b58fa78 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -169,7 +169,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) union iwreq_data wrqu; #endif - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); request = rdev->scan_req; @@ -230,9 +230,9 @@ void __cfg80211_scan_done(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, scan_done_wk); - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); ___cfg80211_scan_done(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) @@ -241,6 +241,7 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req); request->aborted = aborted; + request->notified = true; queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -255,7 +256,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) request = rdev->sched_scan_req; - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); /* we don't have sched_scan_req anymore if the scan is stopping */ if (request) { @@ -270,7 +271,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) nl80211_send_sched_scan_results(rdev, request->dev); } - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } void cfg80211_sched_scan_results(struct wiphy *wiphy) @@ -289,9 +290,9 @@ void cfg80211_sched_scan_stopped(struct wiphy *wiphy) trace_cfg80211_sched_scan_stopped(wiphy); - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); __cfg80211_stop_sched_scan(rdev, true); - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); @@ -300,7 +301,7 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, { struct net_device *dev; - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); if (!rdev->sched_scan_req) return -ENOENT; @@ -1043,21 +1044,19 @@ EXPORT_SYMBOL(cfg80211_unlink_bss); static struct cfg80211_registered_device * cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) { - struct cfg80211_registered_device *rdev = ERR_PTR(-ENODEV); + struct cfg80211_registered_device *rdev; struct net_device *dev; - mutex_lock(&cfg80211_mutex); + ASSERT_RTNL(); + dev = dev_get_by_index(net, ifindex); if (!dev) - goto out; - if (dev->ieee80211_ptr) { + return ERR_PTR(-ENODEV); + if (dev->ieee80211_ptr) rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); - mutex_lock(&rdev->mtx); - } else + else rdev = ERR_PTR(-ENODEV); dev_put(dev); - out: - mutex_unlock(&cfg80211_mutex); return rdev; } @@ -1083,7 +1082,6 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto out; @@ -1190,9 +1188,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, dev_hold(dev); } out: - mutex_unlock(&rdev->sched_scan_mtx); kfree(creq); - cfg80211_unlock_rdev(rdev); return err; } EXPORT_SYMBOL_GPL(cfg80211_wext_siwscan); @@ -1491,10 +1487,8 @@ int cfg80211_wext_giwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - if (rdev->scan_req) { - res = -EAGAIN; - goto out; - } + if (rdev->scan_req) + return -EAGAIN; res = ieee80211_scan_results(rdev, info, extra, data->length); data->length = 0; @@ -1503,8 +1497,6 @@ int cfg80211_wext_giwscan(struct net_device *dev, res = 0; } - out: - cfg80211_unlock_rdev(rdev); return res; } EXPORT_SYMBOL_GPL(cfg80211_wext_giwscan); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 3ed35c345ca..4dbf31407a5 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -43,35 +43,29 @@ static bool cfg80211_is_all_idle(void) struct wireless_dev *wdev; bool is_all_idle = true; - mutex_lock(&cfg80211_mutex); - /* * All devices must be idle as otherwise if you are actively * scanning some new beacon hints could be learned and would * count as new regulatory hints. */ list_for_each_entry(rdev, &cfg80211_rdev_list, list) { - cfg80211_lock_rdev(rdev); list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) is_all_idle = false; wdev_unlock(wdev); } - cfg80211_unlock_rdev(rdev); } - mutex_unlock(&cfg80211_mutex); - return is_all_idle; } static void disconnect_work(struct work_struct *work) { - if (!cfg80211_is_all_idle()) - return; - - regulatory_hint_disconnect(); + rtnl_lock(); + if (cfg80211_is_all_idle()) + regulatory_hint_disconnect(); + rtnl_unlock(); } static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); @@ -85,7 +79,6 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); ASSERT_WDEV_LOCK(wdev); - lockdep_assert_held(&rdev->sched_scan_mtx); if (rdev->scan_req) return -EBUSY; @@ -226,9 +219,6 @@ void cfg80211_conn_work(struct work_struct *work) u8 bssid_buf[ETH_ALEN], *bssid = NULL; rtnl_lock(); - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (!wdev->netdev) @@ -256,9 +246,6 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); } - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } @@ -931,14 +918,9 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, { int err; - mutex_lock(&rdev->devlist_mtx); - /* might request scan - scan_mtx -> wdev_mtx dependency */ - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(dev->ieee80211_ptr); err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL); wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); return err; } diff --git a/net/wireless/util.c b/net/wireless/util.c index 0962f107f57..501724257af 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -808,12 +808,8 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) cfg80211_process_wdev_events(wdev); - - mutex_unlock(&rdev->devlist_mtx); } int cfg80211_change_iface(struct cfg80211_registered_device *rdev, @@ -845,10 +841,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EBUSY; if (ntype != otype && netif_running(dev)) { - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr, ntype); - mutex_unlock(&rdev->devlist_mtx); if (err) return err; @@ -1210,8 +1204,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, if (!beacon_int) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (!wdev->beacon_interval) continue; @@ -1221,8 +1213,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, } } - mutex_unlock(&rdev->devlist_mtx); - return res; } @@ -1246,7 +1236,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, int i, j; ASSERT_RTNL(); - lockdep_assert_held(&rdev->devlist_mtx); if (WARN_ON(hweight32(radar_detect) > 1)) return -EINVAL; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index d997d0f0c54..e7c6e862580 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -72,7 +72,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, struct cfg80211_registered_device *rdev; struct vif_params vifparams; enum nl80211_iftype type; - int ret; rdev = wiphy_to_dev(wdev->wiphy); @@ -98,11 +97,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, memset(&vifparams, 0, sizeof(vifparams)); - cfg80211_lock_rdev(rdev); - ret = cfg80211_change_iface(rdev, dev, type, NULL, &vifparams); - cfg80211_unlock_rdev(rdev); - - return ret; + return cfg80211_change_iface(rdev, dev, type, NULL, &vifparams); } EXPORT_SYMBOL_GPL(cfg80211_wext_siwmode); @@ -579,13 +574,10 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, { int err; - /* devlist mutex needed for possible IBSS re-join */ - mutex_lock(&rdev->devlist_mtx); wdev_lock(dev->ieee80211_ptr); err = __cfg80211_set_encryption(rdev, dev, pairwise, addr, remove, tx_key, idx, params); wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -787,7 +779,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; - int freq, err; + int freq; switch (wdev->iftype) { case NL80211_IFTYPE_STATION: @@ -804,10 +796,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - err = cfg80211_set_monitor_channel(rdev, &chandef); - mutex_unlock(&rdev->devlist_mtx); - return err; + return cfg80211_set_monitor_channel(rdev, &chandef); case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); if (freq < 0) @@ -818,10 +807,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - err = cfg80211_set_mesh_channel(rdev, wdev, &chandef); - mutex_unlock(&rdev->devlist_mtx); - return err; + return cfg80211_set_mesh_channel(rdev, wdev, &chandef); default: return -EOPNOTSUPP; } diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index e79cb5c0655..aeefd681718 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -87,9 +87,6 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, return -EINVAL; } - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -136,9 +133,6 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } @@ -190,9 +184,6 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); err = 0; @@ -226,9 +217,6 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } @@ -287,9 +275,6 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) bssid = NULL; - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) { @@ -318,9 +303,6 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } -- cgit v1.2.3 From 8d61ffa5e01c5f676431d12caba17db164a48a86 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 May 2013 12:32:47 +0200 Subject: cfg80211/mac80211: use cfg80211 wdev mutex in mac80211 Using separate locks in cfg80211 and mac80211 has always caused issues, for example having to unlock in places in mac80211 to call cfg80211, which even needed a framework to make cfg80211 calls after some functions returned etc. Additionally, I suspect some issues people have reported with the cfg80211 state getting confused could be due to such issues, when cfg80211 is asking mac80211 to change state but mac80211 is in the process of telling cfg80211 that the state changed (in another way.) Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 4 +- net/mac80211/debugfs_netdev.c | 10 +- net/mac80211/ht.c | 4 +- net/mac80211/ibss.c | 39 ++--- net/mac80211/ieee80211_i.h | 25 +++- net/mac80211/main.c | 4 +- net/mac80211/mesh.c | 32 ++-- net/mac80211/mesh_plink.c | 7 +- net/mac80211/mlme.c | 341 +++++++++++++++--------------------------- net/mac80211/util.c | 4 +- net/wireless/mlme.c | 48 +----- net/wireless/trace.h | 4 +- 12 files changed, 197 insertions(+), 325 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index eb421905104..232edf78d5a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2318,7 +2318,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode old_req; int err; - lockdep_assert_held(&sdata->u.mgd.mtx); + lockdep_assert_held(&sdata->wdev.mtx); old_req = sdata->u.mgd.req_smps; sdata->u.mgd.req_smps = smps_mode; @@ -2375,9 +2375,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ - mutex_lock(&sdata->u.mgd.mtx); __ieee80211_request_smps(sdata, sdata->u.mgd.req_smps); - mutex_unlock(&sdata->u.mgd.mtx); if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index f83074fe667..cafe614ef93 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -228,9 +228,9 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); err = __ieee80211_request_smps(sdata, smps_mode); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); return err; } @@ -313,16 +313,16 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( case NL80211_IFTYPE_STATION: fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); if (!sdata->u.mgd.associated) { - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); dev_kfree_skb(skb); return -ENOTCONN; } memcpy(hdr->addr1, sdata->u.mgd.associated->bssid, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr->addr3, addr, ETH_ALEN); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); break; default: dev_kfree_skb(skb); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index af8cee06e4f..75dff338f58 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -429,9 +429,9 @@ void ieee80211_request_smps_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.mgd.request_smps_work); - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); } void ieee80211_request_smps(struct ieee80211_vif *vif, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 956ba6316da..caa4b4f7f6e 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -54,7 +54,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; int frame_len; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); /* Reset own TSF to allow time synchronization work. */ drv_reset_tsf(local, sdata); @@ -74,7 +74,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&ifibss->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); rcu_assign_pointer(ifibss->presp, NULL); if (presp) kfree_rcu(presp, rcu_head); @@ -263,7 +263,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, const struct cfg80211_bss_ies *ies; u64 tsf; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); if (beacon_int < 10) beacon_int = 10; @@ -410,7 +410,7 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; u8 deauth_frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); if (len < 24 + 6) return; @@ -677,7 +677,7 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) int active = 0; struct sta_info *sta; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); rcu_read_lock(); @@ -703,7 +703,7 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); @@ -734,7 +734,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) u16 capability; int i; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); if (ifibss->fixed_bssid) { memcpy(bssid, ifibss->bssid, ETH_ALEN); @@ -777,7 +777,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) int active_ibss; u16 capability; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); active_ibss = ieee80211_sta_active_ibss(sdata); ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss); @@ -847,10 +847,10 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; u8 *pos, *end; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&ifibss->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || len < 24 + 2 || !presp) @@ -934,7 +934,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - mutex_lock(&sdata->u.ibss.mtx); + sdata_lock(sdata); if (!sdata->u.ibss.ssid_len) goto mgmt_out; /* not ready to merge yet */ @@ -957,7 +957,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, } mgmt_out: - mutex_unlock(&sdata->u.ibss.mtx); + sdata_unlock(sdata); } void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) @@ -965,7 +965,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct sta_info *sta; - mutex_lock(&ifibss->mtx); + sdata_lock(sdata); /* * Work could be scheduled after scan or similar @@ -1001,7 +1001,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) } out: - mutex_unlock(&ifibss->mtx); + sdata_unlock(sdata); } static void ieee80211_ibss_timer(unsigned long data) @@ -1018,7 +1018,6 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) setup_timer(&ifibss->timer, ieee80211_ibss_timer, (unsigned long) sdata); - mutex_init(&ifibss->mtx); INIT_LIST_HEAD(&ifibss->incomplete_stations); spin_lock_init(&ifibss->incomplete_lock); } @@ -1045,8 +1044,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, { u32 changed = 0; - mutex_lock(&sdata->u.ibss.mtx); - if (params->bssid) { memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); sdata->u.ibss.fixed_bssid = true; @@ -1079,8 +1076,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len); sdata->u.ibss.ssid_len = params->ssid_len; - mutex_unlock(&sdata->u.ibss.mtx); - /* * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is * reserved, but an HT STA shall protect HT transmissions as though @@ -1116,8 +1111,6 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) struct sta_info *sta; struct beacon_data *presp; - mutex_lock(&sdata->u.ibss.mtx); - active_ibss = ieee80211_sta_active_ibss(sdata); if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { @@ -1161,7 +1154,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) /* remove beacon */ kfree(sdata->u.ibss.ie); presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&sdata->u.ibss.mtx)); + lockdep_is_held(&sdata->wdev.mtx)); RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); sdata->vif.bss_conf.ibss_joined = false; sdata->vif.bss_conf.ibss_creator = false; @@ -1177,7 +1170,5 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) del_timer_sync(&sdata->u.ibss.timer); - mutex_unlock(&sdata->u.ibss.mtx); - return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ba3cd284d10..9eed6f1d161 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -394,7 +394,6 @@ struct ieee80211_if_managed { bool nullfunc_failed; bool connection_loss; - struct mutex mtx; struct cfg80211_bss *associated; struct ieee80211_mgd_auth_data *auth_data; struct ieee80211_mgd_assoc_data *assoc_data; @@ -488,8 +487,6 @@ struct ieee80211_if_managed { struct ieee80211_if_ibss { struct timer_list timer; - struct mutex mtx; - unsigned long last_scan_completed; u32 basic_rates; @@ -580,8 +577,6 @@ struct ieee80211_if_mesh { bool accepting_plinks; int num_gates; struct beacon_data __rcu *beacon; - /* just protects beacon updates for now */ - struct mutex mtx; const u8 *ie; u8 ie_len; enum { @@ -778,6 +773,26 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) return container_of(p, struct ieee80211_sub_if_data, vif); } +static inline void sdata_lock(struct ieee80211_sub_if_data *sdata) + __acquires(&sdata->wdev.mtx) +{ + mutex_lock(&sdata->wdev.mtx); + __acquire(&sdata->wdev.mtx); +} + +static inline void sdata_unlock(struct ieee80211_sub_if_data *sdata) + __releases(&sdata->wdev.mtx) +{ + mutex_unlock(&sdata->wdev.mtx); + __release(&sdata->wdev.mtx); +} + +static inline void +sdata_assert_lock(struct ieee80211_sub_if_data *sdata) +{ + lockdep_assert_held(&sdata->wdev.mtx); +} + static inline enum ieee80211_band ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8a7bfc47d57..1998f147526 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -331,7 +331,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, return NOTIFY_DONE; ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); /* Copy the addresses to the bss_conf list */ ifa = idev->ifa_list; @@ -349,7 +349,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_ARP_FILTER); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return NOTIFY_DONE; } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index c14bb816c6a..b3d1fdd4636 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -161,8 +161,11 @@ void mesh_sta_cleanup(struct sta_info *sta) del_timer_sync(&sta->plink_timer); } - if (changed) + if (changed) { + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); + } } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) @@ -577,7 +580,9 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); mod_timer(&ifmsh->housekeeping_timer, round_jiffies(jiffies + @@ -697,25 +702,21 @@ out_free: } static int -ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh) +ieee80211_mesh_rebuild_beacon(struct ieee80211_sub_if_data *sdata) { struct beacon_data *old_bcn; int ret; - mutex_lock(&ifmsh->mtx); - - old_bcn = rcu_dereference_protected(ifmsh->beacon, - lockdep_is_held(&ifmsh->mtx)); - ret = ieee80211_mesh_build_beacon(ifmsh); + old_bcn = rcu_dereference_protected(sdata->u.mesh.beacon, + lockdep_is_held(&sdata->wdev.mtx)); + ret = ieee80211_mesh_build_beacon(&sdata->u.mesh); if (ret) /* just reuse old beacon */ - goto out; + return ret; if (old_bcn) kfree_rcu(old_bcn, rcu_head); -out: - mutex_unlock(&ifmsh->mtx); - return ret; + return 0; } void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, @@ -726,7 +727,7 @@ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT))) - if (ieee80211_mesh_rebuild_beacon(&sdata->u.mesh)) + if (ieee80211_mesh_rebuild_beacon(sdata)) return; ieee80211_bss_info_change_notify(sdata, changed); } @@ -788,12 +789,12 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.enable_beacon = false; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - mutex_lock(&ifmsh->mtx); + sdata_lock(sdata); bcn = rcu_dereference_protected(ifmsh->beacon, - lockdep_is_held(&ifmsh->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); rcu_assign_pointer(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); - mutex_unlock(&ifmsh->mtx); + sdata_unlock(sdata); /* flush STAs and mpaths on this iface */ sta_info_flush(sdata); @@ -1041,7 +1042,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) spin_lock_init(&ifmsh->mesh_preq_queue_lock); spin_lock_init(&ifmsh->sync_offset_lock); RCU_INIT_POINTER(ifmsh->beacon, NULL); - mutex_init(&ifmsh->mtx); sdata->vif.bss_conf.bssid = zero_addr; } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 09bebed9941..6c4da99bc4f 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -517,7 +517,9 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, ieee80211_mps_frame_release(sta, elems); out: rcu_read_unlock(); + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); } static void mesh_plink_timer(unsigned long data) @@ -1068,6 +1070,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); - if (changed) + if (changed) { + sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); + sdata_unlock(sdata); + } } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1da3d6be8e1..f44f4caa69e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -90,41 +90,6 @@ MODULE_PARM_DESC(probe_wait_ms, */ #define IEEE80211_SIGNAL_AVE_MIN_COUNT 4 -/* - * All cfg80211 functions have to be called outside a locked - * section so that they can acquire a lock themselves... This - * is much simpler than queuing up things in cfg80211, but we - * do need some indirection for that here. - */ -enum rx_mgmt_action { - /* no action required */ - RX_MGMT_NONE, - - /* caller must call cfg80211_send_deauth() */ - RX_MGMT_CFG80211_DEAUTH, - - /* caller must call cfg80211_send_disassoc() */ - RX_MGMT_CFG80211_DISASSOC, - - /* caller must call cfg80211_send_rx_auth() */ - RX_MGMT_CFG80211_RX_AUTH, - - /* caller must call cfg80211_send_rx_assoc() */ - RX_MGMT_CFG80211_RX_ASSOC, - - /* caller must call cfg80211_send_assoc_timeout() */ - RX_MGMT_CFG80211_ASSOC_TIMEOUT, - - /* used when a processed beacon causes a deauth */ - RX_MGMT_CFG80211_TX_DEAUTH, -}; - -/* utils */ -static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) -{ - lockdep_assert_held(&ifmgd->mtx); -} - /* * We can have multiple work items (and connection probing) * scheduling this timer, but we need to take care to only @@ -135,13 +100,14 @@ static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) * has happened -- the work that runs from this timer will * do that. */ -static void run_again(struct ieee80211_if_managed *ifmgd, unsigned long timeout) +static void run_again(struct ieee80211_sub_if_data *sdata, + unsigned long timeout) { - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); - if (!timer_pending(&ifmgd->timer) || - time_before(timeout, ifmgd->timer.expires)) - mod_timer(&ifmgd->timer, timeout); + if (!timer_pending(&sdata->u.mgd.timer) || + time_before(timeout, sdata->u.mgd.timer.expires)) + mod_timer(&sdata->u.mgd.timer, timeout); } void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata) @@ -652,7 +618,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_channel *chan; u32 rates = 0; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); @@ -962,7 +928,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ieee80211_sdata_running(sdata)) return; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) goto out; @@ -985,7 +951,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) IEEE80211_QUEUE_STOP_REASON_CSA); out: ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) @@ -1036,7 +1002,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, const struct ieee80211_ht_operation *ht_oper; int secondary_channel_offset = -1; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (!cbss) return; @@ -1845,7 +1811,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; u32 changed = 0; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (WARN_ON_ONCE(tx && !frame_buf)) return; @@ -2054,7 +2020,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) } ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); - run_again(ifmgd, ifmgd->probe_timeout); + run_again(sdata, ifmgd->probe_timeout); if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) ieee80211_flush_queues(sdata->local, sdata); } @@ -2068,7 +2034,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (!ieee80211_sdata_running(sdata)) return; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) goto out; @@ -2122,7 +2088,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, ifmgd->probe_send_count = 0; ieee80211_mgd_probe_ap_send(sdata); out: - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, @@ -2138,7 +2104,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) return NULL; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (ifmgd->associated) cbss = ifmgd->associated; @@ -2171,9 +2137,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) { - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } @@ -2184,13 +2150,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) ieee80211_wake_queues_by_reason(&sdata->local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - mutex_unlock(&ifmgd->mtx); - /* - * must be outside lock due to cfg80211, - * but that's not a problem. - */ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); + sdata_unlock(sdata); } static void ieee80211_beacon_connection_loss_work(struct work_struct *work) @@ -2257,7 +2219,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); if (!assoc) { sta_info_destroy_addr(sdata, auth_data->bss->bssid); @@ -2298,27 +2260,26 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, auth_data->key_idx, tx_flags); } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 bssid[ETH_ALEN]; u16 auth_alg, auth_transaction, status_code; struct sta_info *sta; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 6) - return RX_MGMT_NONE; + return; if (!ifmgd->auth_data || ifmgd->auth_data->done) - return RX_MGMT_NONE; + return; memcpy(bssid, ifmgd->auth_data->bss->bssid, ETH_ALEN); if (!ether_addr_equal(bssid, mgmt->bssid)) - return RX_MGMT_NONE; + return; auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); @@ -2330,14 +2291,15 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, mgmt->sa, auth_alg, ifmgd->auth_data->algorithm, auth_transaction, ifmgd->auth_data->expected_transaction); - return RX_MGMT_NONE; + return; } if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied authentication (status %d)\n", mgmt->sa, status_code); ieee80211_destroy_auth_data(sdata, false); - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + return; } switch (ifmgd->auth_data->algorithm) { @@ -2350,20 +2312,20 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, if (ifmgd->auth_data->expected_transaction != 4) { ieee80211_auth_challenge(sdata, mgmt, len); /* need another frame */ - return RX_MGMT_NONE; + return; } break; default: WARN_ONCE(1, "invalid auth alg %d", ifmgd->auth_data->algorithm); - return RX_MGMT_NONE; + return; } sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && ifmgd->auth_data->expected_transaction != 2) { @@ -2371,7 +2333,8 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, * Report auth frame to user space for processing since another * round of Authentication frames is still needed. */ - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + return; } /* move station state to auth */ @@ -2387,30 +2350,29 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, } mutex_unlock(&sdata->local->sta_mtx); - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + return; out_err: mutex_unlock(&sdata->local->sta_mtx); /* ignore frame -- wait for timeout */ - return RX_MGMT_NONE; } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; const u8 *bssid = NULL; u16 reason_code; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 2) - return RX_MGMT_NONE; + return; if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; bssid = ifmgd->associated->bssid; @@ -2421,25 +2383,24 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - return RX_MGMT_CFG80211_DEAUTH; + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, len); } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u16 reason_code; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 2) - return RX_MGMT_NONE; + return; if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); @@ -2448,7 +2409,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - return RX_MGMT_CFG80211_DISASSOC; + cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, len); } static void ieee80211_get_rates(struct ieee80211_supported_band *sband, @@ -2498,7 +2459,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); if (!assoc) { sta_info_destroy_addr(sdata, assoc_data->bss->bssid); @@ -2679,10 +2640,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, return true; } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - struct cfg80211_bss **bss) +static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data; @@ -2690,13 +2650,14 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems elems; u8 *pos; bool reassoc; + struct cfg80211_bss *bss; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (!assoc_data) - return RX_MGMT_NONE; + return; if (!ether_addr_equal(assoc_data->bss->bssid, mgmt->bssid)) - return RX_MGMT_NONE; + return; /* * AssocResp and ReassocResp have identical structure, so process both @@ -2704,7 +2665,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, */ if (len < 24 + 6) - return RX_MGMT_NONE; + return; reassoc = ieee80211_is_reassoc_req(mgmt->frame_control); capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); @@ -2731,22 +2692,23 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, assoc_data->timeout = jiffies + msecs_to_jiffies(ms); assoc_data->timeout_started = true; if (ms > IEEE80211_ASSOC_TIMEOUT) - run_again(ifmgd, assoc_data->timeout); - return RX_MGMT_NONE; + run_again(sdata, assoc_data->timeout); + return; } - *bss = assoc_data->bss; + bss = assoc_data->bss; if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied association (code=%d)\n", mgmt->sa, status_code); ieee80211_destroy_assoc_data(sdata, false); } else { - if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) { + if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false); - cfg80211_put_bss(sdata->local->hw.wiphy, *bss); - return RX_MGMT_CFG80211_ASSOC_TIMEOUT; + cfg80211_put_bss(sdata->local->hw.wiphy, bss); + cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid); + return; } sdata_info(sdata, "associated\n"); @@ -2758,7 +2720,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee80211_destroy_assoc_data(sdata, true); } - return RX_MGMT_CFG80211_RX_ASSOC; + cfg80211_send_rx_assoc(sdata->dev, bss, (u8 *)mgmt, len); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, @@ -2772,7 +2734,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel; bool need_ps = false; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); if ((sdata->u.mgd.associated && ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) || @@ -2831,7 +2793,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ifmgd = &sdata->u.mgd; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (!ether_addr_equal(mgmt->da, sdata->vif.addr)) return; /* ignore ProbeResp to foreign address */ @@ -2856,7 +2818,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ifmgd->auth_data->tries = 0; ifmgd->auth_data->timeout = jiffies; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); } } @@ -2881,10 +2843,9 @@ static const u64 care_about_ies = (1ULL << WLAN_EID_HT_CAPABILITY) | (1ULL << WLAN_EID_HT_OPERATION); -static enum rx_mgmt_action -ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - u8 *deauth_buf, struct ieee80211_rx_status *rx_status) +static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; @@ -2899,24 +2860,25 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, u8 erp_value = 0; u32 ncrc; u8 *bssid; + u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); /* Process beacon from the current BSS */ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; if (baselen > len) - return RX_MGMT_NONE; + return; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); - return RX_MGMT_NONE; + return; } if (rx_status->freq != chanctx_conf->def.chan->center_freq) { rcu_read_unlock(); - return RX_MGMT_NONE; + return; } chan = chanctx_conf->def.chan; rcu_read_unlock(); @@ -2943,13 +2905,13 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, /* continue assoc process */ ifmgd->assoc_data->timeout = jiffies; ifmgd->assoc_data->timeout_started = true; - run_again(ifmgd, ifmgd->assoc_data->timeout); - return RX_MGMT_NONE; + run_again(sdata, ifmgd->assoc_data->timeout); + return; } if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; bssid = ifmgd->associated->bssid; /* Track average RSSI from the Beacon frames of the current AP */ @@ -3095,7 +3057,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) - return RX_MGMT_NONE; + return; ifmgd->beacon_crc = ncrc; ifmgd->beacon_crc_valid = true; @@ -3151,7 +3113,9 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, true, deauth_buf); - return RX_MGMT_CFG80211_TX_DEAUTH; + cfg80211_send_deauth(sdata->dev, deauth_buf, + sizeof(deauth_buf)); + return; } if (sta && elems.opmode_notif) @@ -3168,19 +3132,13 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, elems.pwr_constr_elem); ieee80211_bss_info_change_notify(sdata, changed); - - return RX_MGMT_NONE; } void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; - struct cfg80211_bss *bss = NULL; - enum rx_mgmt_action rma = RX_MGMT_NONE; - u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; u16 fc; struct ieee802_11_elems elems; int ies_len; @@ -3189,28 +3147,27 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_BEACON: - rma = ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, - deauth_buf, rx_status); + ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_PROBE_RESP: ieee80211_rx_mgmt_probe_resp(sdata, skb); break; case IEEE80211_STYPE_AUTH: - rma = ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_DEAUTH: - rma = ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_DISASSOC: - rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ASSOC_RESP: case IEEE80211_STYPE_REASSOC_RESP: - rma = ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, &bss); + ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ACTION: if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { @@ -3256,34 +3213,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, } break; } - mutex_unlock(&ifmgd->mtx); - - switch (rma) { - case RX_MGMT_NONE: - /* no action */ - break; - case RX_MGMT_CFG80211_DEAUTH: - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_DISASSOC: - cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_RX_AUTH: - cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_RX_ASSOC: - cfg80211_send_rx_assoc(sdata->dev, bss, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_ASSOC_TIMEOUT: - cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid); - break; - case RX_MGMT_CFG80211_TX_DEAUTH: - cfg80211_send_deauth(sdata->dev, deauth_buf, - sizeof(deauth_buf)); - break; - default: - WARN(1, "unexpected: %d", rma); - } + sdata_unlock(sdata); } static void ieee80211_sta_timer(unsigned long data) @@ -3297,20 +3227,12 @@ static void ieee80211_sta_timer(unsigned long data) static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, u8 *bssid, u8 reason, bool tx) { - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, tx, frame_buf); - mutex_unlock(&ifmgd->mtx); - /* - * must be outside lock due to cfg80211, - * but that's not a problem. - */ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); - - mutex_lock(&ifmgd->mtx); } static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) @@ -3320,7 +3242,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data; u32 tx_flags = 0; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (WARN_ON_ONCE(!auth_data)) return -EINVAL; @@ -3393,7 +3315,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) if (tx_flags == 0) { auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, auth_data->timeout); + run_again(sdata, auth_data->timeout); } else { auth_data->timeout_started = false; } @@ -3406,7 +3328,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; struct ieee80211_local *local = sdata->local; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); assoc_data->tries++; if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) { @@ -3430,7 +3352,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; assoc_data->timeout_started = true; - run_again(&sdata->u.mgd, assoc_data->timeout); + run_again(sdata, assoc_data->timeout); } else { assoc_data->timeout_started = false; } @@ -3455,7 +3377,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (ifmgd->status_received) { __le16 fc = ifmgd->status_fc; @@ -3467,7 +3389,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (status_acked) { ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT_SHORT; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); } else { ifmgd->auth_data->timeout = jiffies - 1; } @@ -3478,7 +3400,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (status_acked) { ifmgd->assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT_SHORT; - run_again(ifmgd, ifmgd->assoc_data->timeout); + run_again(sdata, ifmgd->assoc_data->timeout); } else { ifmgd->assoc_data->timeout = jiffies - 1; } @@ -3501,12 +3423,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_auth_data(sdata, false); - mutex_unlock(&ifmgd->mtx); cfg80211_send_auth_timeout(sdata->dev, bssid); - mutex_lock(&ifmgd->mtx); } } else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started) - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started && time_after(jiffies, ifmgd->assoc_data->timeout)) { @@ -3519,12 +3439,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_assoc_data(sdata, false); - mutex_unlock(&ifmgd->mtx); cfg80211_send_assoc_timeout(sdata->dev, bssid); - mutex_lock(&ifmgd->mtx); } } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) - run_again(ifmgd, ifmgd->assoc_data->timeout); + run_again(sdata, ifmgd->assoc_data->timeout); if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL) && @@ -3558,7 +3476,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) false); } } else if (time_is_after_jiffies(ifmgd->probe_timeout)) - run_again(ifmgd, ifmgd->probe_timeout); + run_again(sdata, ifmgd->probe_timeout); else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { mlme_dbg(sdata, "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", @@ -3587,7 +3505,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) } } - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } static void ieee80211_sta_bcn_mon_timer(unsigned long data) @@ -3648,9 +3566,9 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) { - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } @@ -3661,10 +3579,10 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) ifmgd->associated->bssid, WLAN_REASON_UNSPECIFIED, true); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } #endif @@ -3696,8 +3614,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len; ifmgd->p2p_noa_index = -1; - mutex_init(&ifmgd->mtx); - if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC; else @@ -4053,8 +3969,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* try to authenticate/probe */ - mutex_lock(&ifmgd->mtx); - if ((ifmgd->auth_data && !ifmgd->auth_data->done) || ifmgd->assoc_data) { err = -EBUSY; @@ -4074,8 +3988,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - __cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -4092,8 +4006,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* hold our own reference */ cfg80211_ref_bss(local->hw.wiphy, auth_data->bss); - err = 0; - goto out_unlock; + return 0; err_clear: memset(ifmgd->bssid, 0, ETH_ALEN); @@ -4101,9 +4014,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, ifmgd->auth_data = NULL; err_free: kfree(auth_data); - out_unlock: - mutex_unlock(&ifmgd->mtx); - return err; } @@ -4134,8 +4044,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->ssid_len = ssidie[1]; rcu_read_unlock(); - mutex_lock(&ifmgd->mtx); - if (ifmgd->associated) { u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -4143,8 +4051,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - __cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_send_deauth(sdata->dev, frame_buf, + sizeof(frame_buf)); } if (ifmgd->auth_data && !ifmgd->auth_data->done) { @@ -4338,7 +4246,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } rcu_read_unlock(); - run_again(ifmgd, assoc_data->timeout); + run_again(sdata, assoc_data->timeout); if (bss->corrupt_data) { char *corrupt_type = "data"; @@ -4354,17 +4262,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, corrupt_type); } - err = 0; - goto out; + return 0; err_clear: memset(ifmgd->bssid, 0, ETH_ALEN); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->assoc_data = NULL; err_free: kfree(assoc_data); - out: - mutex_unlock(&ifmgd->mtx); - return err; } @@ -4376,8 +4280,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, bool tx = !req->local_state_change; bool report_frame = false; - mutex_lock(&ifmgd->mtx); - sdata_info(sdata, "deauthenticating from %pM by local choice (reason=%d)\n", req->bssid, req->reason_code); @@ -4389,7 +4291,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, tx, frame_buf); ieee80211_destroy_auth_data(sdata, false); - mutex_unlock(&ifmgd->mtx); report_frame = true; goto out; @@ -4401,12 +4302,11 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, tx, frame_buf); report_frame = true; } - mutex_unlock(&ifmgd->mtx); out: if (report_frame) - __cfg80211_send_deauth(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_send_deauth(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } @@ -4418,18 +4318,14 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, u8 bssid[ETH_ALEN]; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - mutex_lock(&ifmgd->mtx); - /* * cfg80211 should catch this ... but it's racy since * we can receive a disassoc frame, process it, hand it * to cfg80211 while that's in a locked section already * trying to tell us that the user wants to disconnect. */ - if (ifmgd->associated != req->bss) { - mutex_unlock(&ifmgd->mtx); + if (ifmgd->associated != req->bss) return -ENOLINK; - } sdata_info(sdata, "disassociating from %pM by local choice (reason=%d)\n", @@ -4439,10 +4335,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DISASSOC, req->reason_code, !req->local_state_change, frame_buf); - mutex_unlock(&ifmgd->mtx); - __cfg80211_send_disassoc(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_send_disassoc(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } @@ -4462,13 +4357,13 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&ifmgd->csa_connection_drop_work); cancel_work_sync(&ifmgd->chswitch_work); - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (ifmgd->assoc_data) ieee80211_destroy_assoc_data(sdata, false); if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); del_timer_sync(&ifmgd->timer); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ffdfe4bc89a..2a8d759324c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1581,9 +1581,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (sdata->u.mgd.dtim_period) changed |= BSS_CHANGED_DTIM_PERIOD; - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); ieee80211_bss_info_change_notify(sdata, changed); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); break; case NL80211_IFTYPE_ADHOC: changed |= BSS_CHANGED_IBSS; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 68b40f21bc3..80ffb013891 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -25,12 +25,9 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len) struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); trace_cfg80211_send_rx_auth(dev); - wdev_lock(wdev); nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); cfg80211_sme_rx_auth(dev, buf, len); - - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_rx_auth); @@ -46,7 +43,6 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); trace_cfg80211_send_rx_assoc(dev, bss); - wdev_lock(wdev); status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); @@ -59,7 +55,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, if (status_code != WLAN_STATUS_SUCCESS && wdev->conn && cfg80211_sme_failed_reassoc(wdev)) { cfg80211_put_bss(wiphy, bss); - goto out; + return; } nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL); @@ -71,7 +67,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, * sme will schedule work that does it later. */ cfg80211_put_bss(wiphy, bss); - goto out; + return; } if (!wdev->conn && wdev->sme_state == CFG80211_SME_IDLE) { @@ -87,13 +83,11 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs, status_code, status_code == WLAN_STATUS_SUCCESS, bss); - out: - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_rx_assoc); -void __cfg80211_send_deauth(struct net_device *dev, - const u8 *buf, size_t len) +void cfg80211_send_deauth(struct net_device *dev, + const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -102,7 +96,7 @@ void __cfg80211_send_deauth(struct net_device *dev, const u8 *bssid = mgmt->bssid; bool was_current = false; - trace___cfg80211_send_deauth(dev); + trace_cfg80211_send_deauth(dev); ASSERT_WDEV_LOCK(wdev); if (wdev->current_bss && @@ -129,20 +123,10 @@ void __cfg80211_send_deauth(struct net_device *dev, false, NULL); } } -EXPORT_SYMBOL(__cfg80211_send_deauth); - -void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - wdev_lock(wdev); - __cfg80211_send_deauth(dev, buf, len); - wdev_unlock(wdev); -} EXPORT_SYMBOL(cfg80211_send_deauth); -void __cfg80211_send_disassoc(struct net_device *dev, - const u8 *buf, size_t len) +void cfg80211_send_disassoc(struct net_device *dev, + const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -152,7 +136,7 @@ void __cfg80211_send_disassoc(struct net_device *dev, u16 reason_code; bool from_ap; - trace___cfg80211_send_disassoc(dev); + trace_cfg80211_send_disassoc(dev); ASSERT_WDEV_LOCK(wdev); nl80211_send_disassoc(rdev, dev, buf, len, GFP_KERNEL); @@ -175,16 +159,6 @@ void __cfg80211_send_disassoc(struct net_device *dev, from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); } -EXPORT_SYMBOL(__cfg80211_send_disassoc); - -void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - wdev_lock(wdev); - __cfg80211_send_disassoc(dev, buf, len); - wdev_unlock(wdev); -} EXPORT_SYMBOL(cfg80211_send_disassoc); void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) @@ -194,15 +168,12 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); trace_cfg80211_send_auth_timeout(dev, addr); - wdev_lock(wdev); nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); if (wdev->sme_state == CFG80211_SME_CONNECTING) __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); - - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_auth_timeout); @@ -213,15 +184,12 @@ void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr) struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); trace_cfg80211_send_assoc_timeout(dev, addr); - wdev_lock(wdev); nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL); if (wdev->sme_state == CFG80211_SME_CONNECTING) __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); - - wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_assoc_timeout); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5755bc14abb..23fafeae8a1 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1911,12 +1911,12 @@ TRACE_EVENT(cfg80211_send_rx_assoc, NETDEV_PR_ARG, MAC_PR_ARG(bssid), CHAN_PR_ARG) ); -DEFINE_EVENT(netdev_evt_only, __cfg80211_send_deauth, +DEFINE_EVENT(netdev_evt_only, cfg80211_send_deauth, TP_PROTO(struct net_device *netdev), TP_ARGS(netdev) ); -DEFINE_EVENT(netdev_evt_only, __cfg80211_send_disassoc, +DEFINE_EVENT(netdev_evt_only, cfg80211_send_disassoc, TP_PROTO(struct net_device *netdev), TP_ARGS(netdev) ); -- cgit v1.2.3 From 1cdd59ce8dcfa850ebb8ac2ab000a2ea572d1d69 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 May 2013 18:58:00 +0200 Subject: cfg80211: simplify and correct P2P-Device scan check If the driver for some reason successfully finishes scanning while in p2p_stop_device(), cfg80211 will still set it to aborted. Simplify this code using the new 'notified' value and only mark it aborted in case the driver didn't notify cfg80211 at all (in which case we also leak the request to not crash, this is a driver bug.) Signed-off-by: Johannes Berg --- net/wireless/core.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 5fc642d4071..afcb9ec70ad 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -204,18 +204,15 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->opencount--; if (rdev->scan_req && rdev->scan_req->wdev == wdev) { - bool busy = work_busy(&rdev->scan_done_wk); - /* - * If the work isn't pending or running (in which case it would - * be waiting for the lock we hold) the driver didn't properly - * cancel the scan when the interface was removed. In this case - * warn and leak the scan request object to not crash later. + * If the scan request wasn't notified as done, set it + * to aborted and leak it after a warning. The driver + * should have notified us that it ended at the latest + * during rdev_stop_p2p_device(). */ - WARN_ON(!busy); - - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, !busy); + if (WARN_ON(!rdev->scan_req->notified)) + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, !rdev->scan_req->notified); } } -- cgit v1.2.3 From db2424c58e5962a87888d25d29ceb0873eef6348 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 May 2013 19:07:52 +0200 Subject: regulatory: use RCU in regulatory_hint_11d() Since it just does a quick check of the last regulatory request, the function doesn't have to hold the reg mutex but can use RCU instead. Signed-off-by: Johannes Berg --- net/wireless/reg.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index e7655961858..17e5eccb42c 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1715,20 +1715,18 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, { char alpha2[2]; enum environment_cap env = ENVIRON_ANY; - struct regulatory_request *request, *lr; - - mutex_lock(®_mutex); - lr = get_last_request(); - - if (unlikely(!lr)) - goto out; + struct regulatory_request *request = NULL, *lr; /* IE len must be evenly divisible by 2 */ if (country_ie_len & 0x01) - goto out; + return; if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) - goto out; + return; + + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (!request) + return; alpha2[0] = country_ie[0]; alpha2[1] = country_ie[1]; @@ -1738,6 +1736,12 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, else if (country_ie[2] == 'O') env = ENVIRON_OUTDOOR; + rcu_read_lock(); + lr = get_last_request(); + + if (unlikely(!lr)) + goto out; + /* * We will run this only upon a successful connection on cfg80211. * We leave conflict resolution to the workqueue, where can hold @@ -1747,10 +1751,6 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, lr->wiphy_idx != WIPHY_IDX_INVALID) goto out; - request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); - if (!request) - goto out; - request->wiphy_idx = get_wiphy_idx(wiphy); request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; @@ -1758,8 +1758,10 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, request->country_ie_env = env; queue_regulatory_request(request); + request = NULL; out: - mutex_unlock(®_mutex); + kfree(request); + rcu_read_unlock(); } static void restore_alpha2(char *alpha2, bool reset_user) -- cgit v1.2.3 From 38fd2143fa653f80729800c1d61d4207b91dca42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 May 2013 19:17:17 +0200 Subject: regulatory: remove reg_mutex The reg_mutex is similar to the ones I just removed in cfg80211 but even less useful since it protects global data, and we hold the RTNL in all places (except module unload) already. Signed-off-by: Johannes Berg --- net/wireless/reg.c | 76 ++++++++++++++---------------------------------------- 1 file changed, 19 insertions(+), 57 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 17e5eccb42c..e1d6749234c 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -81,7 +81,10 @@ static struct regulatory_request core_request_world = { .country_ie_env = ENVIRON_ANY, }; -/* Receipt of information from last regulatory request */ +/* + * Receipt of information from last regulatory request, + * protected by RTNL (and can be accessed with RCU protection) + */ static struct regulatory_request __rcu *last_request = (void __rcu *)&core_request_world; @@ -96,39 +99,25 @@ static struct device_type reg_device_type = { * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no * information to give us an alpha2. + * (protected by RTNL, can be read under RCU) */ const struct ieee80211_regdomain __rcu *cfg80211_regdomain; -/* - * Protects static reg.c components: - * - cfg80211_regdomain (if not used with RCU) - * - cfg80211_world_regdom - * - last_request (if not used with RCU) - * - reg_num_devs_support_basehint - */ -static DEFINE_MUTEX(reg_mutex); - /* * Number of devices that registered to the core * that support cellular base station regulatory hints + * (protected by RTNL) */ static int reg_num_devs_support_basehint; -static inline void assert_reg_lock(void) -{ - lockdep_assert_held(®_mutex); -} - static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { - return rcu_dereference_protected(cfg80211_regdomain, - lockdep_is_held(®_mutex)); + return rtnl_dereference(cfg80211_regdomain); } static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) { - return rcu_dereference_protected(wiphy->regd, - lockdep_is_held(®_mutex)); + return rtnl_dereference(wiphy->regd); } static void rcu_free_regdom(const struct ieee80211_regdomain *r) @@ -140,8 +129,7 @@ static void rcu_free_regdom(const struct ieee80211_regdomain *r) static struct regulatory_request *get_last_request(void) { - return rcu_dereference_check(last_request, - lockdep_is_held(®_mutex)); + return rcu_dereference_rtnl(last_request); } /* Used to queue up regulatory hints */ @@ -200,6 +188,7 @@ static const struct ieee80211_regdomain world_regdom = { } }; +/* protected by RTNL */ static const struct ieee80211_regdomain *cfg80211_world_regdom = &world_regdom; @@ -215,7 +204,7 @@ static void reset_regdomains(bool full_reset, const struct ieee80211_regdomain *r; struct regulatory_request *lr; - assert_reg_lock(); + ASSERT_RTNL(); r = get_cfg80211_regdom(); @@ -936,13 +925,7 @@ static bool reg_request_cell_base(struct regulatory_request *request) bool reg_last_request_cell_base(void) { - bool val; - - mutex_lock(®_mutex); - val = reg_request_cell_base(get_last_request()); - mutex_unlock(®_mutex); - - return val; + return reg_request_cell_base(get_last_request()); } #ifdef CONFIG_CFG80211_CERTIFICATION_ONUS @@ -1444,8 +1427,6 @@ static void reg_set_request_processed(void) * what it believes should be the current regulatory domain. * * Returns one of the different reg request treatment values. - * - * Caller must hold ®_mutex */ static enum reg_request_treatment __regulatory_hint(struct wiphy *wiphy, @@ -1622,10 +1603,8 @@ static void reg_process_pending_beacon_hints(void) static void reg_todo(struct work_struct *work) { rtnl_lock(); - mutex_lock(®_mutex); reg_process_pending_hints(); reg_process_pending_beacon_hints(); - mutex_unlock(®_mutex); rtnl_unlock(); } @@ -1851,8 +1830,6 @@ static void restore_regulatory_settings(bool reset_user) ASSERT_RTNL(); - mutex_lock(®_mutex); - reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); @@ -1906,8 +1883,6 @@ static void restore_regulatory_settings(bool reset_user) list_splice_tail_init(&tmp_reg_req_list, ®_requests_list); spin_unlock(®_requests_lock); - mutex_unlock(®_mutex); - REG_DBG_PRINT("Kicking the queue\n"); schedule_work(®_work); @@ -2222,7 +2197,6 @@ int set_regdom(const struct ieee80211_regdomain *rd) struct regulatory_request *lr; int r; - mutex_lock(®_mutex); lr = get_last_request(); /* Note that this doesn't update the wiphys, this is done below */ @@ -2232,14 +2206,12 @@ int set_regdom(const struct ieee80211_regdomain *rd) reg_set_request_processed(); kfree(rd); - goto out; + return r; } /* This would make this whole thing pointless */ - if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) { - r = -EINVAL; - goto out; - } + if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) + return -EINVAL; /* update all wiphys now with the new established regulatory domain */ update_all_wiphy_regulatory(lr->initiator); @@ -2250,10 +2222,7 @@ int set_regdom(const struct ieee80211_regdomain *rd) reg_set_request_processed(); - out: - mutex_unlock(®_mutex); - - return r; + return 0; } int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) @@ -2278,14 +2247,10 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) void wiphy_regulatory_register(struct wiphy *wiphy) { - mutex_lock(®_mutex); - if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint++; wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); - - mutex_unlock(®_mutex); } void wiphy_regulatory_deregister(struct wiphy *wiphy) @@ -2293,7 +2258,6 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) struct wiphy *request_wiphy = NULL; struct regulatory_request *lr; - mutex_lock(®_mutex); lr = get_last_request(); if (!reg_dev_ignore_cell_hint(wiphy)) @@ -2306,12 +2270,10 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (!request_wiphy || request_wiphy != wiphy) - goto out; + return; lr->wiphy_idx = WIPHY_IDX_INVALID; lr->country_ie_env = ENVIRON_ANY; -out: - mutex_unlock(®_mutex); } static void reg_timeout_work(struct work_struct *work) @@ -2375,9 +2337,9 @@ void regulatory_exit(void) cancel_delayed_work_sync(®_timeout); /* Lock to suppress warnings */ - mutex_lock(®_mutex); + rtnl_lock(); reset_regdomains(true, NULL); - mutex_unlock(®_mutex); + rtnl_unlock(); dev_set_uevent_suppress(®_pdev->dev, true); -- cgit v1.2.3 From 91bf9b26fc95c505846bc2f744a73b51f2aaee1d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2013 17:44:01 +0200 Subject: cfg80211: remove some locked wrappers from mlme API By making all the API functions require wdev locking we can clean up the API a bit, getting rid of the locking version of each function. This also decreases the size of cfg80211 by a small amount. Signed-off-by: Johannes Berg --- net/wireless/core.h | 25 ++--------- net/wireless/mlme.c | 120 ++++++++++--------------------------------------- net/wireless/nl80211.c | 34 +++++++++----- net/wireless/sme.c | 40 ++++++++--------- 4 files changed, 70 insertions(+), 149 deletions(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index d21a0fc0140..c8f87dfd50d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -282,38 +282,21 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev); /* MLME */ -int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, - const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len); int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, const u8 *bssid, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_auth_type auth_type, + const u8 *bssid, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, const u8 *key, int key_len, int key_idx, const u8 *sae_data, int sae_data_len); -int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, const u8 *bssid, const u8 *ssid, int ssid_len, struct cfg80211_assoc_request *req); -int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change); int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 80ffb013891..7bde5d9c000 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -221,15 +221,15 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, EXPORT_SYMBOL(cfg80211_michael_mic_failure); /* some MLME handling for userspace SME */ -int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, - const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len) +int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_auth_type auth_type, + const u8 *bssid, + const u8 *ssid, int ssid_len, + const u8 *ie, int ie_len, + const u8 *key, int key_len, int key_idx, + const u8 *sae_data, int sae_data_len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_auth_request req = { @@ -271,28 +271,6 @@ out: return err; } -int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len) -{ - int err; - - ASSERT_RTNL(); - - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, - ssid, ssid_len, ie, ie_len, - key, key_len, key_idx, - sae_data, sae_data_len); - wdev_unlock(dev->ieee80211_ptr); - - return err; -} - /* Do a logical ht_capa &= ht_capa_mask. */ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask) @@ -327,12 +305,12 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, p1[i] &= p2[i]; } -int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req) +int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + const u8 *bssid, + const u8 *ssid, int ssid_len, + struct cfg80211_assoc_request *req) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -382,30 +360,10 @@ out: return err; } -int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - ASSERT_RTNL(); - - wdev_lock(wdev); - err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, - ssid, ssid_len, req); - wdev_unlock(wdev); - - return err; -} - -int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) +int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *bssid, + const u8 *ie, int ie_len, u16 reason, + bool local_state_change) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_deauth_request req = { @@ -425,26 +383,10 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, return rdev_deauth(rdev, dev, &req); } -int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason, - local_state_change); - wdev_unlock(wdev); - - return err; -} - -static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) +int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *bssid, + const u8 *ie, int ie_len, u16 reason, + bool local_state_change) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_disassoc_request req = { @@ -470,22 +412,6 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, return rdev_disassoc(rdev, dev, &req); } -int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason, - local_state_change); - wdev_unlock(wdev); - - return err; -} - void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 74cdb1a0cf3..49c2f2f511d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5904,10 +5904,13 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (local_state_change) return 0; - return cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, - ssid, ssid_len, ie, ie_len, - key.p.key, key.p.key_len, key.idx, - sae_data, sae_data_len); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, + ssid, ssid_len, ie, ie_len, + key.p.key, key.p.key_len, key.idx, + sae_data, sae_data_len); + wdev_unlock(dev->ieee80211_ptr); + return err; } static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, @@ -6074,9 +6077,12 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) } err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); - if (!err) + if (!err) { + wdev_lock(dev->ieee80211_ptr); err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, ssid, ssid_len, &req); + wdev_unlock(dev->ieee80211_ptr); + } return err; } @@ -6086,7 +6092,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0; + int ie_len = 0, err; u16 reason_code; bool local_state_change; @@ -6121,8 +6127,11 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); + wdev_unlock(dev->ieee80211_ptr); + return err; } static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) @@ -6130,7 +6139,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0; + int ie_len = 0, err; u16 reason_code; bool local_state_change; @@ -6165,8 +6174,11 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); + wdev_unlock(dev->ieee80211_ptr); + return err; } static bool diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 4dbf31407a5..31d67add742 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -169,13 +169,13 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) case CFG80211_CONN_AUTHENTICATE_NEXT: BUG_ON(!rdev->ops->auth); wdev->conn->state = CFG80211_CONN_AUTHENTICATING; - return __cfg80211_mlme_auth(rdev, wdev->netdev, - params->channel, params->auth_type, - params->bssid, - params->ssid, params->ssid_len, - NULL, 0, - params->key, params->key_len, - params->key_idx, NULL, 0); + return cfg80211_mlme_auth(rdev, wdev->netdev, + params->channel, params->auth_type, + params->bssid, + params->ssid, params->ssid_len, + NULL, 0, + params->key, params->key_len, + params->key_idx, NULL, 0); case CFG80211_CONN_ASSOCIATE_NEXT: BUG_ON(!rdev->ops->assoc); wdev->conn->state = CFG80211_CONN_ASSOCIATING; @@ -191,19 +191,19 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) req.vht_capa = params->vht_capa; req.vht_capa_mask = params->vht_capa_mask; - err = __cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel, - params->bssid, params->ssid, - params->ssid_len, &req); + err = cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel, + params->bssid, params->ssid, + params->ssid_len, &req); if (err) - __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, - NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, - false); + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, + false); return err; case CFG80211_CONN_DEAUTH_ASSOC_FAIL: - __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, - NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, false); + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); /* return an error so that we call __cfg80211_connect_result() */ return -EINVAL; default: @@ -961,7 +961,7 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, } /* wdev->conn->params.bssid must be set if > SCANNING */ - err = __cfg80211_mlme_deauth(rdev, dev, + err = cfg80211_mlme_deauth(rdev, dev, wdev->conn->params.bssid, NULL, 0, reason, false); if (err) @@ -1018,6 +1018,6 @@ void cfg80211_sme_disassoc(struct net_device *dev, memcpy(bssid, bss->pub.bssid, ETH_ALEN); - __cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, false); + cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); } -- cgit v1.2.3 From 83739b03de97049181d711c95200b94a14d3f693 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2013 17:44:01 +0200 Subject: cfg80211: remove some locked wrappers from sme API By making all the API functions require wdev locking we can clean up the API a bit, getting rid of the locking version of each function. This also decreases the size of cfg80211 by a small amount. Signed-off-by: Johannes Berg --- net/wireless/core.c | 4 ++-- net/wireless/core.h | 11 ++--------- net/wireless/nl80211.c | 10 ++++++++-- net/wireless/sme.c | 41 +++++++---------------------------------- net/wireless/util.c | 2 ++ net/wireless/wext-sme.c | 23 +++++++++++------------ 6 files changed, 32 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index afcb9ec70ad..ee422871fe9 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -755,8 +755,8 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, wdev->wext.ie_len = 0; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; #endif - __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, true); + cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, true); wdev_unlock(wdev); break; case NL80211_IFTYPE_MESH_POINT: diff --git a/net/wireless/core.h b/net/wireless/core.h index c8f87dfd50d..b4b4a566626 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -328,18 +328,11 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, const struct ieee80211_vht_cap *vht_capa_mask); /* SME */ -int __cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys, - const u8 *prev_bssid); int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys); -int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, u16 reason, - bool wextev); + struct cfg80211_cached_keys *connkeys, + const u8 *prev_bssid); int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 49c2f2f511d..a09f36bb957 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6683,7 +6683,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) sizeof(connect.vht_capa)); } - err = cfg80211_connect(rdev, dev, &connect, connkeys); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL); + wdev_unlock(dev->ieee80211_ptr); if (err) kfree(connkeys); return err; @@ -6694,6 +6696,7 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u16 reason; + int ret; if (!info->attrs[NL80211_ATTR_REASON_CODE]) reason = WLAN_REASON_DEAUTH_LEAVING; @@ -6707,7 +6710,10 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - return cfg80211_disconnect(rdev, dev, reason, true); + wdev_lock(dev->ieee80211_ptr); + ret = cfg80211_disconnect(rdev, dev, reason, true); + wdev_unlock(dev->ieee80211_ptr); + return ret; } static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 31d67add742..81be95f3be7 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -760,11 +760,11 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, } EXPORT_SYMBOL(cfg80211_disconnected); -int __cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys, - const u8 *prev_bssid) +int cfg80211_connect(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_connect_params *connect, + struct cfg80211_cached_keys *connkeys, + const u8 *prev_bssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_bss *bss = NULL; @@ -911,22 +911,8 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, } } -int cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys) -{ - int err; - - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL); - wdev_unlock(dev->ieee80211_ptr); - - return err; -} - -int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, u16 reason, bool wextev) +int cfg80211_disconnect(struct cfg80211_registered_device *rdev, + struct net_device *dev, u16 reason, bool wextev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -983,19 +969,6 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, return 0; } -int cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - u16 reason, bool wextev) -{ - int err; - - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_disconnect(rdev, dev, reason, wextev); - wdev_unlock(dev->ieee80211_ptr); - - return err; -} - void cfg80211_sme_disassoc(struct net_device *dev, struct cfg80211_internal_bss *bss) { diff --git a/net/wireless/util.c b/net/wireless/util.c index 501724257af..74458b7f61e 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -858,8 +858,10 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: + wdev_lock(dev->ieee80211_ptr); cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, true); + wdev_unlock(dev->ieee80211_ptr); break; case NL80211_IFTYPE_MESH_POINT: /* mesh should be handled? */ diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index aeefd681718..a53f8404f45 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -54,8 +54,8 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, if (wdev->wext.prev_bssid_valid) prev_bssid = wdev->wext.prev_bssid; - err = __cfg80211_connect(rdev, wdev->netdev, - &wdev->wext.connect, ck, prev_bssid); + err = cfg80211_connect(rdev, wdev->netdev, + &wdev->wext.connect, ck, prev_bssid); if (err) kfree(ck); @@ -100,8 +100,8 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, /* if SSID set, we'll try right again, avoid event */ if (wdev->wext.connect.ssid_len) event = false; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, event); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, event); if (err) goto out; } @@ -199,8 +199,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, /* if SSID set now, we'll try to connect, avoid event */ if (len) event = false; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, event); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, event); if (err) goto out; } @@ -288,8 +288,8 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, ether_addr_equal(bssid, wdev->wext.connect.bssid)) goto out; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, false); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, false); if (err) goto out; } @@ -365,8 +365,8 @@ int cfg80211_wext_siwgenie(struct net_device *dev, wdev->wext.ie_len = ie_len; if (wdev->sme_state != CFG80211_SME_IDLE) { - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, false); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, false); if (err) goto out; } @@ -402,8 +402,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev, switch (mlme->cmd) { case IW_MLME_DEAUTH: case IW_MLME_DISASSOC: - err = __cfg80211_disconnect(rdev, dev, mlme->reason_code, - true); + err = cfg80211_disconnect(rdev, dev, mlme->reason_code, true); break; default: err = -EOPNOTSUPP; -- cgit v1.2.3 From 079956742452494326081349a66942654498cafa Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Mon, 29 Apr 2013 11:55:10 -0700 Subject: ipvs: change type of netns_ipvs->sysctl_sync_qlen_max This member of struct netns_ipvs is calculated from nr_free_buffer_pages so change its type to unsigned long in case of overflow. Also, type of its related proc var sync_qlen_max and the return type of function sysctl_sync_qlen_max() should be changed to unsigned long, too. Besides, the type of ipvs_master_sync_state->sync_queue_len should be changed to unsigned long accordingly. Signed-off-by: Zhang Yanfei Cc: Julian Anastasov Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5b142fb1648..70146496e73 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1716,9 +1716,9 @@ static struct ctl_table vs_vars[] = { }, { .procname = "sync_qlen_max", - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "sync_sock_size", -- cgit v1.2.3 From 6d0bfe22611602f36617bc7aa2ffa1bbb2f54c67 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 22 May 2013 20:17:31 +0000 Subject: net: ipv6: Add IPv6 support to the ping socket. This adds the ability to send ICMPv6 echo requests without a raw socket. The equivalent ability for ICMPv4 was added in 2011. Instead of having separate code paths for IPv4 and IPv6, make most of the code in net/ipv4/ping.c dual-stack and only add a few IPv6-specific bits (like the protocol definition) to a new net/ipv6/ping.c. Hopefully this will reduce divergence and/or duplication of bugs in the future. Caveats: - Setting options via ancillary data (e.g., using IPV6_PKTINFO to specify the outgoing interface) is not yet supported. - There are no separate security settings for IPv4 and IPv6; everything is controlled by /proc/net/ipv4/ping_group_range. - The proc interface does not yet display IPv6 ping sockets properly. Tested with a patched copy of ping6 and using raw socket calls. Compiles and works with all of CONFIG_IPV6={n,m,y}. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 5 +- net/ipv4/ping.c | 557 +++++++++++++++++++++++++++++++++++++--------------- net/ipv6/Makefile | 2 +- net/ipv6/af_inet6.c | 12 ++ net/ipv6/icmp.c | 19 +- net/ipv6/ping.c | 216 ++++++++++++++++++++ 6 files changed, 643 insertions(+), 168 deletions(-) create mode 100644 net/ipv6/ping.c (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 76e10b47e05..562efd91f45 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -939,7 +939,8 @@ error: void icmp_err(struct sk_buff *skb, u32 info) { struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int offset = iph->ihl<<2; + struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset); int type = icmp_hdr(skb)->type; int code = icmp_hdr(skb)->code; struct net *net = dev_net(skb->dev); @@ -949,7 +950,7 @@ void icmp_err(struct sk_buff *skb, u32 info) * triggered by ICMP_ECHOREPLY which sent from kernel. */ if (icmph->type != ICMP_ECHOREPLY) { - ping_err(skb, info); + ping_err(skb, offset, info); return; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 7d93d62cd5f..71f6ad02fa6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -46,8 +45,18 @@ #include #include +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#include +#include +#include +#endif -static struct ping_table ping_table; + +struct ping_table ping_table; +struct pingv6_ops pingv6_ops; +EXPORT_SYMBOL_GPL(pingv6_ops); static u16 ping_port_rover; @@ -58,6 +67,7 @@ static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int ma pr_debug("hash(%d) = %d\n", num, res); return res; } +EXPORT_SYMBOL_GPL(ping_hash); static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, struct net *net, unsigned int num) @@ -65,7 +75,7 @@ static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; } -static int ping_v4_get_port(struct sock *sk, unsigned short ident) +int ping_get_port(struct sock *sk, unsigned short ident) { struct hlist_nulls_node *node; struct hlist_nulls_head *hlist; @@ -103,6 +113,10 @@ next_port: ping_portaddr_for_each_entry(sk2, node, hlist) { isk2 = inet_sk(sk2); + /* BUG? Why is this reuse and not reuseaddr? ping.c + * doesn't turn off SO_REUSEADDR, and it doesn't expect + * that other ping processes can steal its packets. + */ if ((isk2->inet_num == ident) && (sk2 != sk) && (!sk2->sk_reuse || !sk->sk_reuse)) @@ -125,17 +139,18 @@ fail: write_unlock_bh(&ping_table.lock); return 1; } +EXPORT_SYMBOL_GPL(ping_get_port); -static void ping_v4_hash(struct sock *sk) +void ping_hash(struct sock *sk) { - pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); + pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); BUG(); /* "Please do not press this button again." */ } -static void ping_v4_unhash(struct sock *sk) +void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); - pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); if (sk_hashed(sk)) { write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); @@ -146,31 +161,61 @@ static void ping_v4_unhash(struct sock *sk) write_unlock_bh(&ping_table.lock); } } +EXPORT_SYMBOL_GPL(ping_unhash); -static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr, - u16 ident, int dif) +static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) { struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); struct sock *sk = NULL; struct inet_sock *isk; struct hlist_nulls_node *hnode; + int dif = skb->dev->ifindex; + + if (skb->protocol == htons(ETH_P_IP)) { + pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", + (int)ident, &ip_hdr(skb)->daddr, dif); +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", + (int)ident, &ipv6_hdr(skb)->daddr, dif); +#endif + } - pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", - (int)ident, &daddr, dif); read_lock_bh(&ping_table.lock); ping_portaddr_for_each_entry(sk, hnode, hslot) { isk = inet_sk(sk); - pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk, - (int)isk->inet_num, &isk->inet_rcv_saddr, - sk->sk_bound_dev_if); - pr_debug("iterate\n"); if (isk->inet_num != ident) continue; - if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr) - continue; + + if (skb->protocol == htons(ETH_P_IP) && + sk->sk_family == AF_INET) { + pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, + (int) isk->inet_num, &isk->inet_rcv_saddr, + sk->sk_bound_dev_if); + + if (isk->inet_rcv_saddr && + isk->inet_rcv_saddr != ip_hdr(skb)->daddr) + continue; +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6) && + sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, + (int) isk->inet_num, + &inet6_sk(sk)->rcv_saddr, + sk->sk_bound_dev_if); + + if (!ipv6_addr_any(&np->rcv_saddr) && + !ipv6_addr_equal(&np->rcv_saddr, + &ipv6_hdr(skb)->daddr)) + continue; +#endif + } + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; @@ -200,7 +245,7 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, } -static int ping_init_sock(struct sock *sk) +int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); kgid_t group = current_egid(); @@ -225,8 +270,9 @@ static int ping_init_sock(struct sock *sk) return -EACCES; } +EXPORT_SYMBOL_GPL(ping_init_sock); -static void ping_close(struct sock *sk, long timeout) +void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", inet_sk(sk), inet_sk(sk)->inet_num); @@ -234,36 +280,122 @@ static void ping_close(struct sock *sk, long timeout) sk_common_release(sk); } +EXPORT_SYMBOL_GPL(ping_close); + +/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ +int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, + struct sockaddr *uaddr, int addr_len) { + struct net *net = sock_net(sk); + if (sk->sk_family == AF_INET) { + struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + int chk_addr_ret; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", + sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); + + chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + + if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) + chk_addr_ret = RTN_LOCAL; + + if ((sysctl_ip_nonlocal_bind == 0 && + isk->freebind == 0 && isk->transparent == 0 && + chk_addr_ret != RTN_LOCAL) || + chk_addr_ret == RTN_MULTICAST || + chk_addr_ret == RTN_BROADCAST) + return -EADDRNOTAVAIL; + +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; + int addr_type, scoped, has_addr; + struct net_device *dev = NULL; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", + sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); + + addr_type = ipv6_addr_type(&addr->sin6_addr); + scoped = __ipv6_addr_needs_scope_id(addr_type); + if ((addr_type != IPV6_ADDR_ANY && + !(addr_type & IPV6_ADDR_UNICAST)) || + (scoped && !addr->sin6_scope_id)) + return -EINVAL; + + rcu_read_lock(); + if (addr->sin6_scope_id) { + dev = dev_get_by_index_rcu(net, addr->sin6_scope_id); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + } + has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, + scoped); + rcu_read_unlock(); + + if (!(isk->freebind || isk->transparent || has_addr || + addr_type == IPV6_ADDR_ANY)) + return -EADDRNOTAVAIL; + + if (scoped) + sk->sk_bound_dev_if = addr->sin6_scope_id; +#endif + } else { + return -EAFNOSUPPORT; + } + return 0; +} +void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) +{ + if (saddr->sa_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + struct sockaddr_in *addr = (struct sockaddr_in *) saddr; + isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (saddr->sa_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; + struct ipv6_pinfo *np = inet6_sk(sk); + np->rcv_saddr = np->saddr = addr->sin6_addr; +#endif + } +} + +void ping_clear_saddr(struct sock *sk, int dif) +{ + sk->sk_bound_dev_if = dif; + if (sk->sk_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + isk->inet_rcv_saddr = isk->inet_saddr = 0; +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&np->saddr, 0, sizeof(np->saddr)); +#endif + } +} /* * We need our own bind because there are no privileged id's == local ports. * Moreover, we don't allow binding to multi- and broadcast addresses. */ -static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct inet_sock *isk = inet_sk(sk); unsigned short snum; - int chk_addr_ret; int err; + int dif = sk->sk_bound_dev_if; - if (addr_len < sizeof(struct sockaddr_in)) - return -EINVAL; - - pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n", - sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); - - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); - if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) - chk_addr_ret = RTN_LOCAL; - - if ((sysctl_ip_nonlocal_bind == 0 && - isk->freebind == 0 && isk->transparent == 0 && - chk_addr_ret != RTN_LOCAL) || - chk_addr_ret == RTN_MULTICAST || - chk_addr_ret == RTN_BROADCAST) - return -EADDRNOTAVAIL; + err = ping_check_bind_addr(sk, isk, uaddr, addr_len); + if (err) + return err; lock_sock(sk); @@ -272,42 +404,50 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto out; err = -EADDRINUSE; - isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; - snum = ntohs(addr->sin_port); - if (ping_v4_get_port(sk, snum) != 0) { - isk->inet_saddr = isk->inet_rcv_saddr = 0; + ping_set_saddr(sk, uaddr); + snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port); + if (ping_get_port(sk, snum) != 0) { + ping_clear_saddr(sk, dif); goto out; } - pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n", + pr_debug("after bind(): num = %d, dif = %d\n", (int)isk->inet_num, - &isk->inet_rcv_saddr, (int)sk->sk_bound_dev_if); err = 0; - if (isk->inet_rcv_saddr) + if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) || + (sk->sk_family == AF_INET6 && + !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr))) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; + if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; isk->inet_sport = htons(isk->inet_num); isk->inet_daddr = 0; isk->inet_dport = 0; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr)); +#endif + sk_dst_reset(sk); out: release_sock(sk); pr_debug("ping_v4_bind -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_bind); /* * Is this a supported type of ICMP message? */ -static inline int ping_supported(int type, int code) +static inline int ping_supported(int family, int type, int code) { - if (type == ICMP_ECHO && code == 0) - return 1; - return 0; + return (family == AF_INET && type == ICMP_ECHO && code == 0) || + (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0); } /* @@ -315,30 +455,42 @@ static inline int ping_supported(int type, int code) * sort of error condition. */ -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); - -void ping_err(struct sk_buff *skb, u32 info) +void ping_err(struct sk_buff *skb, int offset, u32 info) { - struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int family; + struct icmphdr *icmph; struct inet_sock *inet_sock; - int type = icmp_hdr(skb)->type; - int code = icmp_hdr(skb)->code; + int type; + int code; struct net *net = dev_net(skb->dev); struct sock *sk; int harderr; int err; + if (skb->protocol == htons(ETH_P_IP)) { + family = AF_INET; + type = icmp_hdr(skb)->type; + code = icmp_hdr(skb)->code; + icmph = (struct icmphdr *)(skb->data + offset); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + family = AF_INET6; + type = icmp6_hdr(skb)->icmp6_type; + code = icmp6_hdr(skb)->icmp6_code; + icmph = (struct icmphdr *) (skb->data + offset); + } else { + BUG(); + } + /* We assume the packet has already been checked by icmp_unreach */ - if (!ping_supported(icmph->type, icmph->code)) + if (!ping_supported(family, icmph->type, icmph->code)) return; - pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type, - code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); + pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n", + skb->protocol, type, code, ntohs(icmph->un.echo.id), + ntohs(icmph->un.echo.sequence)); - sk = ping_v4_lookup(net, iph->daddr, iph->saddr, - ntohs(icmph->un.echo.id), skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk == NULL) { pr_debug("no socket, dropping\n"); return; /* No socket for error */ @@ -349,72 +501,83 @@ void ping_err(struct sk_buff *skb, u32 info) harderr = 0; inet_sock = inet_sk(sk); - switch (type) { - default: - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - case ICMP_SOURCE_QUENCH: - /* This is not a real error but ping wants to see it. - * Report it with some fake errno. */ - err = EREMOTEIO; - break; - case ICMP_PARAMETERPROB: - err = EPROTO; - harderr = 1; - break; - case ICMP_DEST_UNREACH: - if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - ipv4_sk_update_pmtu(skb, sk, info); - if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { - err = EMSGSIZE; - harderr = 1; - break; + if (skb->protocol == htons(ETH_P_IP)) { + switch (type) { + default: + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + case ICMP_SOURCE_QUENCH: + /* This is not a real error but ping wants to see it. + * Report it with some fake errno. + */ + err = EREMOTEIO; + break; + case ICMP_PARAMETERPROB: + err = EPROTO; + harderr = 1; + break; + case ICMP_DEST_UNREACH: + if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + ipv4_sk_update_pmtu(skb, sk, info); + if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { + err = EMSGSIZE; + harderr = 1; + break; + } + goto out; } - goto out; - } - err = EHOSTUNREACH; - if (code <= NR_ICMP_UNREACH) { - harderr = icmp_err_convert[code].fatal; - err = icmp_err_convert[code].errno; + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + case ICMP_REDIRECT: + /* See ICMP_SOURCE_QUENCH */ + ipv4_sk_redirect(skb, sk); + err = EREMOTEIO; + break; } - break; - case ICMP_REDIRECT: - /* See ICMP_SOURCE_QUENCH */ - ipv4_sk_redirect(skb, sk); - err = EREMOTEIO; - break; +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + harderr = pingv6_ops.icmpv6_err_convert(type, code, &err); +#endif } /* * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ - if (!inet_sock->recverr) { + if ((family == AF_INET && !inet_sock->recverr) || + (family == AF_INET6 && !inet6_sk(sk)->recverr)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { - ip_icmp_error(sk, skb, err, 0 /* no remote port */, - info, (u8 *)icmph); + if (family == AF_INET) { + ip_icmp_error(sk, skb, err, 0 /* no remote port */, + info, (u8 *)icmph); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + pingv6_ops.ipv6_icmp_error(sk, skb, err, 0, + info, (u8 *)icmph); +#endif + } } sk->sk_err = err; sk->sk_error_report(sk); out: sock_put(sk); } +EXPORT_SYMBOL_GPL(ping_err); /* - * Copy and checksum an ICMP Echo packet from user space into a buffer. + * Copy and checksum an ICMP Echo packet from user space into a buffer + * starting from the payload. */ -struct pingfakehdr { - struct icmphdr icmph; - struct iovec *iov; - __wsum wcheck; -}; - -static int ping_getfrag(void *from, char *to, - int offset, int fraglen, int odd, struct sk_buff *skb) +int ping_getfrag(void *from, char *to, + int offset, int fraglen, int odd, struct sk_buff *skb) { struct pingfakehdr *pfh = (struct pingfakehdr *)from; @@ -425,20 +588,33 @@ static int ping_getfrag(void *from, char *to, pfh->iov, 0, fraglen - sizeof(struct icmphdr), &pfh->wcheck)) return -EFAULT; + } else if (offset < sizeof(struct icmphdr)) { + BUG(); + } else { + if (csum_partial_copy_fromiovecend + (to, pfh->iov, offset - sizeof(struct icmphdr), + fraglen, &pfh->wcheck)) + return -EFAULT; + } - return 0; +#if IS_ENABLED(CONFIG_IPV6) + /* For IPv6, checksum each skb as we go along, as expected by + * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in + * wcheck, it will be finalized in ping_v4_push_pending_frames. + */ + if (pfh->family == AF_INET6) { + skb->csum = pfh->wcheck; + skb->ip_summed = CHECKSUM_NONE; + pfh->wcheck = 0; } - if (offset < sizeof(struct icmphdr)) - BUG(); - if (csum_partial_copy_fromiovecend - (to, pfh->iov, offset - sizeof(struct icmphdr), - fraglen, &pfh->wcheck)) - return -EFAULT; +#endif + return 0; } +EXPORT_SYMBOL_GPL(ping_getfrag); -static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, - struct flowi4 *fl4) +static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, + struct flowi4 *fl4) { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); @@ -450,24 +626,9 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, return ip_push_pending_frames(sk, fl4); } -static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) -{ - struct net *net = sock_net(sk); - struct flowi4 fl4; - struct inet_sock *inet = inet_sk(sk); - struct ipcm_cookie ipc; - struct icmphdr user_icmph; - struct pingfakehdr pfh; - struct rtable *rt = NULL; - struct ip_options_data opt_copy; - int free = 0; - __be32 saddr, daddr, faddr; - u8 tos; - int err; - - pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); - +int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, + void *user_icmph, size_t icmph_len) { + u8 type, code; if (len > 0xFFFF) return -EMSGSIZE; @@ -482,15 +643,53 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* * Fetch the ICMP header provided by the userland. - * iovec is modified! + * iovec is modified! The ICMP header is consumed. */ - - if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov, - sizeof(struct icmphdr))) + if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len)) return -EFAULT; - if (!ping_supported(user_icmph.type, user_icmph.code)) + + if (family == AF_INET) { + type = ((struct icmphdr *) user_icmph)->type; + code = ((struct icmphdr *) user_icmph)->code; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + type = ((struct icmp6hdr *) user_icmph)->icmp6_type; + code = ((struct icmp6hdr *) user_icmph)->icmp6_code; +#endif + } else { + BUG(); + } + + if (!ping_supported(family, type, code)) return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(ping_common_sendmsg); + +int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct net *net = sock_net(sk); + struct flowi4 fl4; + struct inet_sock *inet = inet_sk(sk); + struct ipcm_cookie ipc; + struct icmphdr user_icmph; + struct pingfakehdr pfh; + struct rtable *rt = NULL; + struct ip_options_data opt_copy; + int free = 0; + __be32 saddr, daddr, faddr; + u8 tos; + int err; + + pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + /* * Get and verify the address. */ @@ -595,13 +794,14 @@ back_from_confirm: pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; pfh.iov = msg->msg_iov; pfh.wcheck = 0; + pfh.family = AF_INET; err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else - err = ping_push_pending_frames(sk, &pfh, &fl4); + err = ping_v4_push_pending_frames(sk, &pfh, &fl4); release_sock(sk); out: @@ -622,11 +822,13 @@ do_confirm: goto out; } -static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + int family = sk->sk_family; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; struct sk_buff *skb; int copied, err; @@ -636,11 +838,22 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); + if (addr_len) { + if (family == AF_INET) + *addr_len = sizeof(*sin); + else if (family == AF_INET6 && addr_len) + *addr_len = sizeof(*sin6); + } - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + if (flags & MSG_ERRQUEUE) { + if (family == AF_INET) { + return ip_recv_error(sk, msg, len); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + return pingv6_ops.ipv6_recv_error(sk, msg, len); +#endif + } + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -659,15 +872,40 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sock_recv_timestamp(msg, sk, skb); - /* Copy the address. */ - if (sin) { + /* Copy the address and add cmsg data. */ + if (family == AF_INET) { + sin = (struct sockaddr_in *) msg->msg_name; sin->sin_family = AF_INET; sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + + if (isk->cmsg_flags) + ip_cmsg_recv(msg, skb); + +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6hdr *ip6 = ipv6_hdr(skb); + sin6 = (struct sockaddr_in6 *) msg->msg_name; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_addr = ip6->saddr; + + if (np->sndflow) + sin6->sin6_flowinfo = ip6_flowinfo(ip6); + + if (__ipv6_addr_needs_scope_id( + ipv6_addr_type(&sin6->sin6_addr))) + sin6->sin6_scope_id = IP6CB(skb)->iif; + + if (inet6_sk(sk)->rxopt.all) + pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb); +#endif + } else { + BUG(); } - if (isk->cmsg_flags) - ip_cmsg_recv(msg, skb); + err = copied; done: @@ -676,8 +914,9 @@ out: pr_debug("ping_recvmsg -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_recvmsg); -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); @@ -688,6 +927,7 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } return 0; } +EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); /* @@ -698,10 +938,7 @@ void ping_rcv(struct sk_buff *skb) { struct sock *sk; struct net *net = dev_net(skb->dev); - struct iphdr *iph = ip_hdr(skb); struct icmphdr *icmph = icmp_hdr(skb); - __be32 saddr = iph->saddr; - __be32 daddr = iph->daddr; /* We assume the packet has already been checked by icmp_rcv */ @@ -711,8 +948,7 @@ void ping_rcv(struct sk_buff *skb) /* Push ICMP header back */ skb_push(skb, skb->data - (u8 *)icmph); - sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id), - skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk != NULL) { pr_debug("rcv on socket %p\n", sk); ping_queue_rcv_skb(sk, skb_get(skb)); @@ -723,6 +959,7 @@ void ping_rcv(struct sk_buff *skb) /* We're called from icmp_rcv(). kfree_skb() is done there. */ } +EXPORT_SYMBOL_GPL(ping_rcv); struct proto ping_prot = { .name = "PING", @@ -733,14 +970,14 @@ struct proto ping_prot = { .disconnect = udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .sendmsg = ping_sendmsg, + .sendmsg = ping_v4_sendmsg, .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, .release_cb = ip4_datagram_release_cb, - .hash = ping_v4_hash, - .unhash = ping_v4_unhash, - .get_port = ping_v4_get_port, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, .obj_size = sizeof(struct inet_sock), }; EXPORT_SYMBOL(ping_prot); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 9af088d2cda..470a9c008e9 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ - raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ab5c7ad482c..a5ac969aeef 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -840,6 +841,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_udplite_proto; + err = proto_register(&pingv6_prot, 1); + if (err) + goto out_unregister_ping_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. @@ -930,6 +934,10 @@ static int __init inet6_init(void) if (err) goto ipv6_packet_fail; + err = pingv6_init(); + if (err) + goto pingv6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -942,6 +950,8 @@ out: sysctl_fail: ipv6_packet_cleanup(); #endif +pingv6_fail: + pingv6_exit(); ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: @@ -985,6 +995,8 @@ register_pernet_fail: rtnl_unregister_all(PF_INET6); out_sock_register_fail: rawv6_exit(); +out_unregister_ping_proto: + proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b4ff0a42b8c..1d2902e6178 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -57,6 +57,7 @@ #include #include +#include #include #include #include @@ -84,12 +85,18 @@ static inline struct sock *icmpv6_sk(struct net *net) static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ + struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) ip6_update_pmtu(skb, net, info, 0, 0); else if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); + + if (!(type & ICMPV6_INFOMSG_MASK)) + if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) + ping_err(skb, offset, info); } static int icmpv6_rcv(struct sk_buff *skb); @@ -224,7 +231,8 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) +int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; @@ -307,8 +315,8 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif -static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6) +struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, + struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; @@ -697,7 +705,8 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", + LIMIT_NETDEBUG(KERN_DEBUG + "ICMPv6 checksum failed [%pI6c > %pI6c]\n", saddr, daddr); goto csum_error; } @@ -718,7 +727,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - /* we couldn't care less */ + ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c new file mode 100644 index 00000000000..a6462d657c1 --- /dev/null +++ b/net/ipv6/ping.c @@ -0,0 +1,216 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * "Ping" sockets + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on ipv4/ping.c code. + * + * Authors: Lorenzo Colitti (IPv6 support) + * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6), + * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32) + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .no_check = UDP_CSUM_DEFAULT, + .flags = INET_PROTOSW_REUSE, +}; + + +/* Compatibility glue so we can support IPv6 when it's compiled as a module */ +int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +{ + return -EAFNOSUPPORT; +} +int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} +int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) +{ + return -EAFNOSUPPORT; +} +void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) {} +int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev, int strict) +{ + return 0; +} + +int __init pingv6_init(void) +{ + pingv6_ops.ipv6_recv_error = ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; + return inet6_register_protosw(&pingv6_protosw); +} + +/* This never gets called because it's not possible to unload the ipv6 module, + * but just in case. + */ +void pingv6_exit(void) +{ + pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; + inet6_unregister_protosw(&pingv6_protosw); +} + +int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct icmp6hdr user_icmph; + int addr_type; + struct in6_addr *daddr; + int iif = 0; + struct flowi6 fl6; + int err; + int hlimit; + struct dst_entry *dst; + struct rt6_info *rt; + struct pingfakehdr pfh; + + pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + + if (msg->msg_name) { + struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; + if (msg->msg_namelen < sizeof(struct sockaddr_in6) || + u->sin6_family != AF_INET6) { + return -EINVAL; + } + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != u->sin6_scope_id) { + return -EINVAL; + } + daddr = &(u->sin6_addr); + iif = u->sin6_scope_id; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = &np->daddr; + } + + if (!iif) + iif = sk->sk_bound_dev_if; + + addr_type = ipv6_addr_type(daddr); + if (__ipv6_addr_needs_scope_id(addr_type) && !iif) + return -EINVAL; + if (addr_type & IPV6_ADDR_MAPPED) + return -EINVAL; + + /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ + + memset(&fl6, 0, sizeof(fl6)); + + fl6.flowi6_proto = IPPROTO_ICMPV6; + fl6.saddr = np->saddr; + fl6.daddr = *daddr; + fl6.fl6_icmp_type = user_icmph.icmp6_type; + fl6.fl6_icmp_code = user_icmph.icmp6_code; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; + + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1); + if (IS_ERR(dst)) + return PTR_ERR(dst); + rt = (struct rt6_info *) dst; + + np = inet6_sk(sk); + if (!np) + return -EBADF; + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; + + pfh.icmph.type = user_icmph.icmp6_type; + pfh.icmph.code = user_icmph.icmp6_code; + pfh.icmph.checksum = 0; + pfh.icmph.un.echo.id = inet->inet_sport; + pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; + pfh.iov = msg->msg_iov; + pfh.wcheck = 0; + pfh.family = AF_INET6; + + if (ipv6_addr_is_multicast(&fl6.daddr)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = ip6_dst_hoplimit(dst); + + err = ip6_append_data(sk, ping_getfrag, &pfh, len, + 0, hlimit, + np->tclass, NULL, &fl6, rt, + MSG_DONTWAIT, np->dontfrag); + + if (err) { + ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, + ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); + } else { + err = icmpv6_push_pending_frames(sk, &fl6, + (struct icmp6hdr *) &pfh.icmph, + len); + } + + return err; +} -- cgit v1.2.3 From 42e52bf9e3ae80fd44b21ddfcd64c54e6db2ff76 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 25 May 2013 04:12:10 +0000 Subject: net: add netnotifier event for upper device change Now when upper device is changed, event is not propagated via RT Netlink to userspace. Userspace might never now about the change. Fix this by adding upper-device-change notifier event. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 7229bc30e50..50c02ded1d6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4411,7 +4411,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, else list_add_tail_rcu(&upper->list, &dev->upper_dev_list); dev_hold(upper_dev); - + call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); return 0; } @@ -4471,6 +4471,7 @@ void netdev_upper_dev_unlink(struct net_device *dev, list_del_rcu(&upper->list); dev_put(upper_dev); kfree_rcu(upper, rcu); + call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); } EXPORT_SYMBOL(netdev_upper_dev_unlink); -- cgit v1.2.3 From 1f6afc81088a1f5a472b272408730c73b72c68aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 May 2013 15:03:54 +0000 Subject: tcp: remove one indentation level in tcp_rcv_state_process() Remove one level of indentation 'introduced' in commit c3ae62af8e75 (tcp: should drop incoming frames without ACK flag set) if (true) { ... } @acceptable variable is a boolean. This patch is a pure cleanup. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 269 +++++++++++++++++++++++++-------------------------- 1 file changed, 133 insertions(+), 136 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8230cd6243a..40614257d2c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5536,6 +5536,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *req; int queued = 0; + bool acceptable; tp->rx_opt.saw_tstamp = 0; @@ -5606,157 +5607,153 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; /* step 5: check the ACK field */ - if (true) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT) > 0; - - switch (sk->sk_state) { - case TCP_SYN_RECV: - if (acceptable) { - /* Once we leave TCP_SYN_RECV, we no longer - * need req so release it. - */ - if (req) { - tcp_synack_rtt_meas(sk, req); - tp->total_retrans = req->num_retrans; - - reqsk_fastopen_remove(sk, req, false); - } else { - /* Make sure socket is routed, for - * correct metrics. - */ - icsk->icsk_af_ops->rebuild_header(sk); - tcp_init_congestion_control(sk); + acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT) > 0; - tcp_mtup_init(sk); - tcp_init_buffer_space(sk); - tp->copied_seq = tp->rcv_nxt; - } - smp_mb(); - tcp_set_state(sk, TCP_ESTABLISHED); - sk->sk_state_change(sk); + switch (sk->sk_state) { + case TCP_SYN_RECV: + if (acceptable) { + /* Once we leave TCP_SYN_RECV, we no longer + * need req so release it. + */ + if (req) { + tcp_synack_rtt_meas(sk, req); + tp->total_retrans = req->num_retrans; - /* Note, that this wakeup is only for marginal - * crossed SYN case. Passively open sockets - * are not waked up, because sk->sk_sleep == - * NULL and sk->sk_socket == NULL. + reqsk_fastopen_remove(sk, req, false); + } else { + /* Make sure socket is routed, for + * correct metrics. */ - if (sk->sk_socket) - sk_wake_async(sk, - SOCK_WAKE_IO, POLL_OUT); - - tp->snd_una = TCP_SKB_CB(skb)->ack_seq; - tp->snd_wnd = ntohs(th->window) << - tp->rx_opt.snd_wscale; - tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - - if (tp->rx_opt.tstamp_ok) - tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - - if (req) { - /* Re-arm the timer because data may - * have been sent out. This is similar - * to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more - * aggressive and retranmitting any data - * sooner based on when they were sent - * out. - */ - tcp_rearm_rto(sk); - } else - tcp_init_metrics(sk); + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_congestion_control(sk); - /* Prevent spurious tcp_cwnd_restart() on - * first data packet. + tcp_mtup_init(sk); + tcp_init_buffer_space(sk); + tp->copied_seq = tp->rcv_nxt; + } + smp_mb(); + tcp_set_state(sk, TCP_ESTABLISHED); + sk->sk_state_change(sk); + + /* Note, that this wakeup is only for marginal + * crossed SYN case. Passively open sockets + * are not waked up, because sk->sk_sleep == + * NULL and sk->sk_socket == NULL. + */ + if (sk->sk_socket) + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + + tp->snd_una = TCP_SKB_CB(skb)->ack_seq; + tp->snd_wnd = ntohs(th->window) << + tp->rx_opt.snd_wscale; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); + + if (tp->rx_opt.tstamp_ok) + tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; + + if (req) { + /* Re-arm the timer because data may + * have been sent out. This is similar + * to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive + * and retransmitting any data sooner based + * on when they are sent out. */ - tp->lsndtime = tcp_time_stamp; + tcp_rearm_rto(sk); + } else + tcp_init_metrics(sk); - tcp_initialize_rcv_mss(sk); - tcp_fast_path_on(tp); - } else { - return 1; - } - break; + /* Prevent spurious tcp_cwnd_restart() on + * first data packet. + */ + tp->lsndtime = tcp_time_stamp; - case TCP_FIN_WAIT1: - /* If we enter the TCP_FIN_WAIT1 state and we are a - * Fast Open socket and this is the first acceptable - * ACK we have received, this would have acknowledged - * our SYNACK so stop the SYNACK timer. + tcp_initialize_rcv_mss(sk); + tcp_fast_path_on(tp); + } else { + return 1; + } + break; + + case TCP_FIN_WAIT1: + /* If we enter the TCP_FIN_WAIT1 state and we are a + * Fast Open socket and this is the first acceptable + * ACK we have received, this would have acknowledged + * our SYNACK so stop the SYNACK timer. + */ + if (req != NULL) { + /* Return RST if ack_seq is invalid. + * Note that RFC793 only says to generate a + * DUPACK for it but for TCP Fast Open it seems + * better to treat this case like TCP_SYN_RECV + * above. */ - if (req != NULL) { - /* Return RST if ack_seq is invalid. - * Note that RFC793 only says to generate a - * DUPACK for it but for TCP Fast Open it seems - * better to treat this case like TCP_SYN_RECV - * above. - */ - if (!acceptable) + if (!acceptable) + return 1; + /* We no longer need the request sock. */ + reqsk_fastopen_remove(sk, req, false); + tcp_rearm_rto(sk); + } + if (tp->snd_una == tp->write_seq) { + struct dst_entry *dst; + + tcp_set_state(sk, TCP_FIN_WAIT2); + sk->sk_shutdown |= SEND_SHUTDOWN; + + dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); + + if (!sock_flag(sk, SOCK_DEAD)) { + /* Wake up lingering close() */ + sk->sk_state_change(sk); + } else { + int tmo; + + if (tp->linger2 < 0 || + (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { + tcp_done(sk); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); return 1; - /* We no longer need the request sock. */ - reqsk_fastopen_remove(sk, req, false); - tcp_rearm_rto(sk); - } - if (tp->snd_una == tp->write_seq) { - struct dst_entry *dst; - - tcp_set_state(sk, TCP_FIN_WAIT2); - sk->sk_shutdown |= SEND_SHUTDOWN; - - dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); - - if (!sock_flag(sk, SOCK_DEAD)) - /* Wake up lingering close() */ - sk->sk_state_change(sk); - else { - int tmo; - - if (tp->linger2 < 0 || - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { - tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); - return 1; - } + } - tmo = tcp_fin_time(sk); - if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); - } else if (th->fin || sock_owned_by_user(sk)) { - /* Bad case. We could lose such FIN otherwise. - * It is not a big problem, but it looks confusing - * and not so rare event. We still can lose it now, - * if it spins in bh_lock_sock(), but it is really - * marginal case. - */ - inet_csk_reset_keepalive_timer(sk, tmo); - } else { - tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); - goto discard; - } + tmo = tcp_fin_time(sk); + if (tmo > TCP_TIMEWAIT_LEN) { + inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + } else if (th->fin || sock_owned_by_user(sk)) { + /* Bad case. We could lose such FIN otherwise. + * It is not a big problem, but it looks confusing + * and not so rare event. We still can lose it now, + * if it spins in bh_lock_sock(), but it is really + * marginal case. + */ + inet_csk_reset_keepalive_timer(sk, tmo); + } else { + tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); + goto discard; } } - break; + } + break; - case TCP_CLOSING: - if (tp->snd_una == tp->write_seq) { - tcp_time_wait(sk, TCP_TIME_WAIT, 0); - goto discard; - } - break; + case TCP_CLOSING: + if (tp->snd_una == tp->write_seq) { + tcp_time_wait(sk, TCP_TIME_WAIT, 0); + goto discard; + } + break; - case TCP_LAST_ACK: - if (tp->snd_una == tp->write_seq) { - tcp_update_metrics(sk); - tcp_done(sk); - goto discard; - } - break; + case TCP_LAST_ACK: + if (tp->snd_una == tp->write_seq) { + tcp_update_metrics(sk); + tcp_done(sk); + goto discard; } + break; } /* step 6: check the URG bit */ -- cgit v1.2.3 From 61eb900352ff731d990d5415ce9f04e4af6a6136 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 24 May 2013 18:36:13 +0000 Subject: tcp: Remove another indentation level in tcp_rcv_state_process case TCP_SYN_RECV: can have another indentation level removed by converting if (acceptable) { ...; } else { return 1; } to if (!acceptable) return 1; ...; Reflow code and comments to fit 80 columns. Another pure cleanup patch. Signed-off-by: Joe Perches Improved-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 110 ++++++++++++++++++++++++--------------------------- 1 file changed, 51 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 40614257d2c..413b480b932 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5612,70 +5612,62 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, switch (sk->sk_state) { case TCP_SYN_RECV: - if (acceptable) { - /* Once we leave TCP_SYN_RECV, we no longer - * need req so release it. - */ - if (req) { - tcp_synack_rtt_meas(sk, req); - tp->total_retrans = req->num_retrans; + if (!acceptable) + return 1; - reqsk_fastopen_remove(sk, req, false); - } else { - /* Make sure socket is routed, for - * correct metrics. - */ - icsk->icsk_af_ops->rebuild_header(sk); - tcp_init_congestion_control(sk); + /* Once we leave TCP_SYN_RECV, we no longer need req + * so release it. + */ + if (req) { + tcp_synack_rtt_meas(sk, req); + tp->total_retrans = req->num_retrans; - tcp_mtup_init(sk); - tcp_init_buffer_space(sk); - tp->copied_seq = tp->rcv_nxt; - } - smp_mb(); - tcp_set_state(sk, TCP_ESTABLISHED); - sk->sk_state_change(sk); - - /* Note, that this wakeup is only for marginal - * crossed SYN case. Passively open sockets - * are not waked up, because sk->sk_sleep == - * NULL and sk->sk_socket == NULL. - */ - if (sk->sk_socket) - sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); - - tp->snd_una = TCP_SKB_CB(skb)->ack_seq; - tp->snd_wnd = ntohs(th->window) << - tp->rx_opt.snd_wscale; - tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - - if (tp->rx_opt.tstamp_ok) - tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - - if (req) { - /* Re-arm the timer because data may - * have been sent out. This is similar - * to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more aggressive - * and retransmitting any data sooner based - * on when they are sent out. - */ - tcp_rearm_rto(sk); - } else - tcp_init_metrics(sk); + reqsk_fastopen_remove(sk, req, false); + } else { + /* Make sure socket is routed, for correct metrics. */ + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_congestion_control(sk); + + tcp_mtup_init(sk); + tcp_init_buffer_space(sk); + tp->copied_seq = tp->rcv_nxt; + } + smp_mb(); + tcp_set_state(sk, TCP_ESTABLISHED); + sk->sk_state_change(sk); - /* Prevent spurious tcp_cwnd_restart() on - * first data packet. + /* Note, that this wakeup is only for marginal crossed SYN case. + * Passively open sockets are not waked up, because + * sk->sk_sleep == NULL and sk->sk_socket == NULL. + */ + if (sk->sk_socket) + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + + tp->snd_una = TCP_SKB_CB(skb)->ack_seq; + tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); + + if (tp->rx_opt.tstamp_ok) + tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; + + if (req) { + /* Re-arm the timer because data may have been sent out. + * This is similar to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive and + * retransmitting any data sooner based on when they + * are sent out. */ - tp->lsndtime = tcp_time_stamp; + tcp_rearm_rto(sk); + } else + tcp_init_metrics(sk); - tcp_initialize_rcv_mss(sk); - tcp_fast_path_on(tp); - } else { - return 1; - } + /* Prevent spurious tcp_cwnd_restart() on first data packet */ + tp->lsndtime = tcp_time_stamp; + + tcp_initialize_rcv_mss(sk); + tcp_fast_path_on(tp); break; case TCP_FIN_WAIT1: -- cgit v1.2.3 From c48b22daa6062fff9eded311b4d6974c29b40487 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 24 May 2013 18:06:58 +0000 Subject: tcp: Remove 2 indentation levels in tcp_rcv_state_process case TCP_FIN_WAIT1 can also be simplified by reversing tests and adding breaks; Add braces after case and move automatic definitions. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 76 +++++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 413b480b932..9579e1a5a14 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5670,7 +5670,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tcp_fast_path_on(tp); break; - case TCP_FIN_WAIT1: + case TCP_FIN_WAIT1: { + struct dst_entry *dst; + int tmo; + /* If we enter the TCP_FIN_WAIT1 state and we are a * Fast Open socket and this is the first acceptable * ACK we have received, this would have acknowledged @@ -5689,48 +5692,47 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, reqsk_fastopen_remove(sk, req, false); tcp_rearm_rto(sk); } - if (tp->snd_una == tp->write_seq) { - struct dst_entry *dst; + if (tp->snd_una != tp->write_seq) + break; - tcp_set_state(sk, TCP_FIN_WAIT2); - sk->sk_shutdown |= SEND_SHUTDOWN; + tcp_set_state(sk, TCP_FIN_WAIT2); + sk->sk_shutdown |= SEND_SHUTDOWN; - dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); + dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); - if (!sock_flag(sk, SOCK_DEAD)) { - /* Wake up lingering close() */ - sk->sk_state_change(sk); - } else { - int tmo; - - if (tp->linger2 < 0 || - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { - tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); - return 1; - } + if (!sock_flag(sk, SOCK_DEAD)) { + /* Wake up lingering close() */ + sk->sk_state_change(sk); + break; + } - tmo = tcp_fin_time(sk); - if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); - } else if (th->fin || sock_owned_by_user(sk)) { - /* Bad case. We could lose such FIN otherwise. - * It is not a big problem, but it looks confusing - * and not so rare event. We still can lose it now, - * if it spins in bh_lock_sock(), but it is really - * marginal case. - */ - inet_csk_reset_keepalive_timer(sk, tmo); - } else { - tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); - goto discard; - } - } + if (tp->linger2 < 0 || + (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { + tcp_done(sk); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + return 1; + } + + tmo = tcp_fin_time(sk); + if (tmo > TCP_TIMEWAIT_LEN) { + inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + } else if (th->fin || sock_owned_by_user(sk)) { + /* Bad case. We could lose such FIN otherwise. + * It is not a big problem, but it looks confusing + * and not so rare event. We still can lose it now, + * if it spins in bh_lock_sock(), but it is really + * marginal case. + */ + inet_csk_reset_keepalive_timer(sk, tmo); + } else { + tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); + goto discard; } break; + } case TCP_CLOSING: if (tp->snd_una == tp->write_seq) { -- cgit v1.2.3 From 9d5242b19269432ea388d766312ed49f184f83fd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 25 May 2013 01:46:10 +0000 Subject: netfilter: nfnetlink_queue: avoid peer_portid test The portid is set to NETLINK_CB(skb).portid at create time. The run-time check will always be false. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 2e0e835baf7..cff4449f01d 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -509,10 +509,6 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, } spin_lock_bh(&queue->lock); - if (!queue->peer_portid) { - err = -EINVAL; - goto err_out_free_nskb; - } if (queue->queue_total >= queue->queue_maxlen) { if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { failopen = 1; -- cgit v1.2.3 From 4e7dba99c9e606e304f104ce4071d8b5ba93957e Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 22 May 2013 14:59:10 +0200 Subject: netfilter: Implement RFC 1123 for FTP conntrack The FTP conntrack code currently only accepts the following format for the 227 response for PASV: 227 Entering Passive Mode (148,100,81,40,31,161). It doesn't accept the following format from an obscure server: 227 Data transfer will passively listen to 67,218,99,134,50,144 From RFC 1123: The format of the 227 reply to a PASV command is not well standardized. In particular, an FTP client cannot assume that the parentheses shown on page 40 of RFC-959 will be present (and in fact, Figure 3 on page 43 omits them). Therefore, a User-FTP program that interprets the PASV reply must scan the reply for the first digit of the host and port numbers. This patch adds support for the RFC 1123 clarification by: - Allowing a search filter to specify NUL as the terminator so that try_number will return successfully if the array of numbers has been filled when an unexpected character is encountered. - Using space as the separator for the 227 reply and then scanning for the first digit of the number sequence. The number sequence is parsed out using the existing try_rfc959 but with a NUL terminator. References: https://bugzilla.novell.com/show_bug.cgi?id=466279 References: http://bugzilla.netfilter.org/show_bug.cgi?id=574 Reported-by: Mark Post Signed-off-by: Jeff Mahoney Signed-off-by: Jiri Slaby Cc: Pablo Neira Ayuso Cc: Patrick McHardy Cc: "David S. Miller" Cc: netfilter-devel@vger.kernel.org Cc: netfilter@vger.kernel.org Cc: coreteam@netfilter.org Cc: netdev@vger.kernel.org Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 73 +++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 6b217074237..b8a0924064e 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -55,10 +55,14 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, struct nf_conntrack_expect *exp); EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); -static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char); -static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char); +static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); +static int try_rfc1123(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); +static int try_eprt(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, - char); + char, unsigned int *); static struct ftp_search { const char *pattern; @@ -66,7 +70,7 @@ static struct ftp_search { char skip; char term; enum nf_ct_ftp_type ftptype; - int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char); + int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char, unsigned int *); } search[IP_CT_DIR_MAX][2] = { [IP_CT_DIR_ORIGINAL] = { { @@ -90,10 +94,8 @@ static struct ftp_search { { .pattern = "227 ", .plen = sizeof("227 ") - 1, - .skip = '(', - .term = ')', .ftptype = NF_CT_FTP_PASV, - .getnum = try_rfc959, + .getnum = try_rfc1123, }, { .pattern = "229 ", @@ -132,8 +134,9 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[], i++; else { /* Unexpected character; true if it's the - terminator and we're finished. */ - if (*data == term && i == array_size - 1) + terminator (or we don't care about one) + and we're finished. */ + if ((*data == term || !term) && i == array_size - 1) return len; pr_debug("Char %u (got %u nums) `%u' unexpected\n", @@ -148,7 +151,8 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[], /* Returns 0, or length of numbers: 192,168,1,1,5,6 */ static int try_rfc959(const char *data, size_t dlen, - struct nf_conntrack_man *cmd, char term) + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) { int length; u_int32_t array[6]; @@ -163,6 +167,33 @@ static int try_rfc959(const char *data, size_t dlen, return length; } +/* + * From RFC 1123: + * The format of the 227 reply to a PASV command is not + * well standardized. In particular, an FTP client cannot + * assume that the parentheses shown on page 40 of RFC-959 + * will be present (and in fact, Figure 3 on page 43 omits + * them). Therefore, a User-FTP program that interprets + * the PASV reply must scan the reply for the first digit + * of the host and port numbers. + */ +static int try_rfc1123(const char *data, size_t dlen, + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) +{ + int i; + for (i = 0; i < dlen; i++) + if (isdigit(data[i])) + break; + + if (i == dlen) + return 0; + + *offset += i; + + return try_rfc959(data + i, dlen - i, cmd, 0, offset); +} + /* Grab port: number up to delimiter */ static int get_port(const char *data, int start, size_t dlen, char delim, __be16 *port) @@ -191,7 +222,7 @@ static int get_port(const char *data, int start, size_t dlen, char delim, /* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, - char term) + char term, unsigned int *offset) { char delim; int length; @@ -239,7 +270,8 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, /* Returns 0, or length of numbers: |||6446| */ static int try_epsv_response(const char *data, size_t dlen, - struct nf_conntrack_man *cmd, char term) + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) { char delim; @@ -261,9 +293,10 @@ static int find_pattern(const char *data, size_t dlen, unsigned int *numlen, struct nf_conntrack_man *cmd, int (*getnum)(const char *, size_t, - struct nf_conntrack_man *, char)) + struct nf_conntrack_man *, char, + unsigned int *)) { - size_t i; + size_t i = plen; pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen); if (dlen == 0) @@ -293,16 +326,18 @@ static int find_pattern(const char *data, size_t dlen, pr_debug("Pattern matches!\n"); /* Now we've found the constant string, try to skip to the 'skip' character */ - for (i = plen; data[i] != skip; i++) - if (i == dlen - 1) return -1; + if (skip) { + for (i = plen; data[i] != skip; i++) + if (i == dlen - 1) return -1; - /* Skip over the last character */ - i++; + /* Skip over the last character */ + i++; + } pr_debug("Skipped up to `%c'!\n", skip); *numoff = i; - *numlen = getnum(data + i, dlen - i, cmd, term); + *numlen = getnum(data + i, dlen - i, cmd, term, numoff); if (!*numlen) return -1; -- cgit v1.2.3 From 6abb9cb99f33b20c2f32f18a3ae9cc7543e46edb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2013 09:30:07 +0200 Subject: cfg80211: make WoWLAN configuration available to drivers Make the current WoWLAN configuration available to drivers at runtime. This isn't really useful for the normal WoWLAN behaviour and accessing it can also be racy, but drivers may use it for testing the WoWLAN device behaviour while the host stays up & running to observe the device. Signed-off-by: Johannes Berg --- net/wireless/core.c | 4 +++- net/wireless/core.h | 21 ++++++++++---------- net/wireless/nl80211.c | 53 ++++++++++++++++++++++++++------------------------ net/wireless/sysfs.c | 8 ++++++-- 4 files changed, 48 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index ee422871fe9..41cec1776f4 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -663,8 +663,10 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); - if (rdev->wowlan && rdev->ops->set_wakeup) +#ifdef CONFIG_PM + if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) rdev_set_wakeup(rdev, false); +#endif cfg80211_rdev_free_wowlan(rdev); } EXPORT_SYMBOL(wiphy_unregister); diff --git a/net/wireless/core.h b/net/wireless/core.h index b4b4a566626..a65eaf8a84c 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -74,8 +74,6 @@ struct cfg80211_registered_device { struct work_struct conn_work; struct work_struct event_work; - struct cfg80211_wowlan *wowlan; - struct delayed_work dfs_update_channels_wk; /* netlink port which started critical protocol (0 means not started) */ @@ -96,17 +94,20 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) static inline void cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) { +#ifdef CONFIG_PM int i; - if (!rdev->wowlan) + if (!rdev->wiphy.wowlan_config) return; - for (i = 0; i < rdev->wowlan->n_patterns; i++) - kfree(rdev->wowlan->patterns[i].mask); - kfree(rdev->wowlan->patterns); - if (rdev->wowlan->tcp && rdev->wowlan->tcp->sock) - sock_release(rdev->wowlan->tcp->sock); - kfree(rdev->wowlan->tcp); - kfree(rdev->wowlan); + for (i = 0; i < rdev->wiphy.wowlan_config->n_patterns; i++) + kfree(rdev->wiphy.wowlan_config->patterns[i].mask); + kfree(rdev->wiphy.wowlan_config->patterns); + if (rdev->wiphy.wowlan_config->tcp && + rdev->wiphy.wowlan_config->tcp->sock) + sock_release(rdev->wiphy.wowlan_config->tcp->sock); + kfree(rdev->wiphy.wowlan_config->tcp); + kfree(rdev->wiphy.wowlan_config); +#endif } extern struct workqueue_struct *cfg80211_wq; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a09f36bb957..fb6abcb359a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7489,28 +7489,29 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) static int nl80211_send_wowlan_patterns(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { + struct cfg80211_wowlan *wowlan = rdev->wiphy.wowlan_config; struct nlattr *nl_pats, *nl_pat; int i, pat_len; - if (!rdev->wowlan->n_patterns) + if (!wowlan->n_patterns) return 0; nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; - for (i = 0; i < rdev->wowlan->n_patterns; i++) { + for (i = 0; i < wowlan->n_patterns; i++) { nl_pat = nla_nest_start(msg, i + 1); if (!nl_pat) return -ENOBUFS; - pat_len = rdev->wowlan->patterns[i].pattern_len; + pat_len = wowlan->patterns[i].pattern_len; if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, DIV_ROUND_UP(pat_len, 8), - rdev->wowlan->patterns[i].mask) || + wowlan->patterns[i].mask) || nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, - pat_len, rdev->wowlan->patterns[i].pattern) || + pat_len, wowlan->patterns[i].pattern) || nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET, - rdev->wowlan->patterns[i].pkt_offset)) + wowlan->patterns[i].pkt_offset)) return -ENOBUFS; nla_nest_end(msg, nl_pat); } @@ -7573,12 +7574,12 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; - if (rdev->wowlan && rdev->wowlan->tcp) { + if (rdev->wiphy.wowlan_config && rdev->wiphy.wowlan_config->tcp) { /* adjust size to have room for all the data */ - size += rdev->wowlan->tcp->tokens_size + - rdev->wowlan->tcp->payload_len + - rdev->wowlan->tcp->wake_len + - rdev->wowlan->tcp->wake_len / 8; + size += rdev->wiphy.wowlan_config->tcp->tokens_size + + rdev->wiphy.wowlan_config->tcp->payload_len + + rdev->wiphy.wowlan_config->tcp->wake_len + + rdev->wiphy.wowlan_config->tcp->wake_len / 8; } msg = nlmsg_new(size, GFP_KERNEL); @@ -7590,33 +7591,34 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) if (!hdr) goto nla_put_failure; - if (rdev->wowlan) { + if (rdev->wiphy.wowlan_config) { struct nlattr *nl_wowlan; nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); if (!nl_wowlan) goto nla_put_failure; - if ((rdev->wowlan->any && + if ((rdev->wiphy.wowlan_config->any && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - (rdev->wowlan->disconnect && + (rdev->wiphy.wowlan_config->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - (rdev->wowlan->magic_pkt && + (rdev->wiphy.wowlan_config->magic_pkt && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - (rdev->wowlan->gtk_rekey_failure && + (rdev->wiphy.wowlan_config->gtk_rekey_failure && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - (rdev->wowlan->eap_identity_req && + (rdev->wiphy.wowlan_config->eap_identity_req && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - (rdev->wowlan->four_way_handshake && + (rdev->wiphy.wowlan_config->four_way_handshake && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - (rdev->wowlan->rfkill_release && + (rdev->wiphy.wowlan_config->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; if (nl80211_send_wowlan_patterns(msg, rdev)) goto nla_put_failure; - if (nl80211_send_wowlan_tcp(msg, rdev->wowlan->tcp)) + if (nl80211_send_wowlan_tcp(msg, + rdev->wiphy.wowlan_config->tcp)) goto nla_put_failure; nla_nest_end(msg, nl_wowlan); @@ -7783,7 +7785,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) struct cfg80211_wowlan *ntrig; struct wiphy_wowlan_support *wowlan = &rdev->wiphy.wowlan; int err, i; - bool prev_enabled = rdev->wowlan; + bool prev_enabled = rdev->wiphy.wowlan_config; if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && !rdev->wiphy.wowlan.tcp) @@ -7791,7 +7793,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { cfg80211_rdev_free_wowlan(rdev); - rdev->wowlan = NULL; + rdev->wiphy.wowlan_config = NULL; goto set_wakeup; } @@ -7927,11 +7929,12 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto error; } cfg80211_rdev_free_wowlan(rdev); - rdev->wowlan = ntrig; + rdev->wiphy.wowlan_config = ntrig; set_wakeup: - if (rdev->ops->set_wakeup && prev_enabled != !!rdev->wowlan) - rdev_set_wakeup(rdev, rdev->wowlan); + if (rdev->ops->set_wakeup && + prev_enabled != !!rdev->wiphy.wowlan_config) + rdev_set_wakeup(rdev, rdev->wiphy.wowlan_config); return 0; error: diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 8f28b9f798d..360a42c6f69 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -91,6 +91,7 @@ static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) cfg80211_leave(rdev, wdev); } +#ifdef CONFIG_PM static int wiphy_suspend(struct device *dev, pm_message_t state) { struct cfg80211_registered_device *rdev = dev_to_rdev(dev); @@ -100,10 +101,10 @@ static int wiphy_suspend(struct device *dev, pm_message_t state) rtnl_lock(); if (rdev->wiphy.registered) { - if (!rdev->wowlan) + if (!rdev->wiphy.wowlan_config) cfg80211_leave_all(rdev); if (rdev->ops->suspend) - ret = rdev_suspend(rdev, rdev->wowlan); + ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); if (ret == 1) { /* Driver refuse to configure wowlan */ cfg80211_leave_all(rdev); @@ -132,6 +133,7 @@ static int wiphy_resume(struct device *dev) return ret; } +#endif static const void *wiphy_namespace(struct device *d) { @@ -146,8 +148,10 @@ struct class ieee80211_class = { .dev_release = wiphy_dev_release, .dev_attrs = ieee80211_dev_attrs, .dev_uevent = wiphy_uevent, +#ifdef CONFIG_PM .suspend = wiphy_suspend, .resume = wiphy_resume, +#endif .ns_type = &net_ns_type_operations, .namespace = wiphy_namespace, }; -- cgit v1.2.3 From 0d89d2035fe063461a5ddb609b2c12e7fb006e44 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 23 May 2013 21:02:52 +0000 Subject: MPLS: Add limited GSO support In the case where a non-MPLS packet is received and an MPLS stack is added it may well be the case that the original skb is GSO but the NIC used for transmit does not support GSO of MPLS packets. The aim of this code is to provide GSO in software for MPLS packets whose skbs are GSO. SKB Usage: When an implementation adds an MPLS stack to a non-MPLS packet it should do the following to skb metadata: * Set skb->inner_protocol to the old non-MPLS ethertype of the packet. skb->inner_protocol is added by this patch. * Set skb->protocol to the new MPLS ethertype of the packet. * Set skb->network_header to correspond to the end of the L3 header, including the MPLS label stack. I have posted a patch, "[PATCH v3.29] datapath: Add basic MPLS support to kernel" which adds MPLS support to the kernel datapath of Open vSwtich. That patch sets the above requirements in datapath/actions.c:push_mpls() and was used to exercise this code. The datapath patch is against the Open vSwtich tree but it is intended that it be added to the Open vSwtich code present in the mainline Linux kernel at some point. Features: I believe that the approach that I have taken is at least partially consistent with the handling of other protocols. Jesse, I understand that you have some ideas here. I am more than happy to change my implementation. This patch adds dev->mpls_features which may be used by devices to advertise features supported for MPLS packets. A new NETIF_F_MPLS_GSO feature is added for devices which support hardware MPLS GSO offload. Currently no devices support this and MPLS GSO always falls back to software. Alternate Implementation: One possible alternate implementation is to teach netif_skb_features() and skb_network_protocol() about MPLS, in a similar way to their understanding of VLANs. I believe this would avoid the need for net/mpls/mpls_gso.c and in particular the calls to __skb_push() and __skb_push() in mpls_gso_segment(). I have decided on the implementation in this patch as it should not introduce any overhead in the case where mpls_gso is not compiled into the kernel or inserted as a module. MPLS GSO suggested by Jesse Gross. Based in part on "v4 GRE: Add TCP segmentation offload for GRE" by Pravin B Shelar. Cc: Jesse Gross Cc: Pravin B Shelar Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/Kconfig | 1 + net/Makefile | 1 + net/core/dev.c | 4 ++ net/core/ethtool.c | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/tcp.c | 1 + net/ipv4/udp.c | 2 +- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp_offload.c | 3 +- net/mpls/Kconfig | 9 +++++ net/mpls/Makefile | 4 ++ net/mpls/mpls_gso.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 134 insertions(+), 2 deletions(-) create mode 100644 net/mpls/Kconfig create mode 100644 net/mpls/Makefile create mode 100644 net/mpls/mpls_gso.c (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 08de901415e..523e43e6da1 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -218,6 +218,7 @@ source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" source "net/netlink/Kconfig" +source "net/mpls/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index 091e7b04f30..9492e8cb64e 100644 --- a/net/Makefile +++ b/net/Makefile @@ -70,3 +70,4 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ +obj-$(CONFIG_NET_MPLS_GSO) += mpls/ diff --git a/net/core/dev.c b/net/core/dev.c index 50c02ded1d6..2f09cb29cc9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5277,6 +5277,10 @@ int register_netdevice(struct net_device *dev) */ dev->hw_enc_features |= NETIF_F_SG; + /* Make NETIF_F_SG inheritable to MPLS. + */ + dev->mpls_features |= NETIF_F_SG; + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); if (ret) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 22efdaa76eb..4e6f63ade74 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", + [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d01be2a3ae5..b05ae96aec4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1295,6 +1295,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_GRE | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | + SKB_GSO_MPLS | 0))) goto out; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d87ce72ca8a..ba4186e1dca 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2917,6 +2917,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0bf5d399a03..aa5eff46d13 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2381,7 +2381,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE) || + SKB_GSO_GRE | SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 71b766ee821..a263b990ee1 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -98,6 +98,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL | + SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 3bb3a891a42..76d401a93c7 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -63,7 +63,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE) || + SKB_GSO_GRE | + SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig new file mode 100644 index 00000000000..37421db8896 --- /dev/null +++ b/net/mpls/Kconfig @@ -0,0 +1,9 @@ +# +# MPLS configuration +# +config NET_MPLS_GSO + tristate "MPLS: GSO support" + help + This is helper module to allow segmentation of non-MPLS GSO packets + that have had MPLS stack entries pushed onto them and thus + become MPLS GSO packets. diff --git a/net/mpls/Makefile b/net/mpls/Makefile new file mode 100644 index 00000000000..0a3c171be53 --- /dev/null +++ b/net/mpls/Makefile @@ -0,0 +1,4 @@ +# +# Makefile for MPLS. +# +obj-y += mpls_gso.o diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c new file mode 100644 index 00000000000..1bec1219ab8 --- /dev/null +++ b/net/mpls/mpls_gso.c @@ -0,0 +1,108 @@ +/* + * MPLS GSO Support + * + * Authors: Simon Horman (horms@verge.net.au) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on: GSO portions of net/ipv4/gre.c + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t mpls_features; + __be16 mpls_protocol; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE | + SKB_GSO_MPLS))) + goto out; + + /* Setup inner SKB. */ + mpls_protocol = skb->protocol; + skb->protocol = skb->inner_protocol; + + /* Push back the mac header that skb_mac_gso_segment() has pulled. + * It will be re-pulled by the call to skb_mac_gso_segment() below + */ + __skb_push(skb, skb->mac_len); + + /* Segment inner packet. */ + mpls_features = skb->dev->mpls_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, mpls_features); + + + /* Restore outer protocol. */ + skb->protocol = mpls_protocol; + + /* Re-pull the mac header that the call to skb_mac_gso_segment() + * above pulled. It will be re-pushed after returning + * skb_mac_gso_segment(), an indirect caller of this function. + */ + __skb_push(skb, skb->data - skb_mac_header(skb)); + +out: + return segs; +} + +static int mpls_gso_send_check(struct sk_buff *skb) +{ + return 0; +} + +static struct packet_offload mpls_mc_offload = { + .type = cpu_to_be16(ETH_P_MPLS_MC), + .callbacks = { + .gso_send_check = mpls_gso_send_check, + .gso_segment = mpls_gso_segment, + }, +}; + +static struct packet_offload mpls_uc_offload = { + .type = cpu_to_be16(ETH_P_MPLS_UC), + .callbacks = { + .gso_send_check = mpls_gso_send_check, + .gso_segment = mpls_gso_segment, + }, +}; + +static int __init mpls_gso_init(void) +{ + pr_info("MPLS GSO support\n"); + + dev_add_offload(&mpls_uc_offload); + dev_add_offload(&mpls_mc_offload); + + return 0; +} + +static void __exit mpls_gso_exit(void) +{ + dev_remove_offload(&mpls_uc_offload); + dev_remove_offload(&mpls_mc_offload); +} + +module_init(mpls_gso_init); +module_exit(mpls_gso_exit); + +MODULE_DESCRIPTION("MPLS GSO support"); +MODULE_AUTHOR("Simon Horman (horms@verge.net.au)"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From da6e378ba918cd0feeb90eeb84d8b42148bb0c82 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Mon, 27 May 2013 19:53:31 +0000 Subject: netpoll: remove return value from netpoll_rx_disable() The netpoll_rx_disable() will always return 0, it is no use and looks wordy, so remove the unnecessary code and get rid of it in _dev_open and _dev_close. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- net/core/dev.c | 15 ++++----------- net/core/netpoll.c | 3 +-- 2 files changed, 5 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2f09cb29cc9..5f747974ac5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1198,9 +1198,7 @@ static int __dev_open(struct net_device *dev) * If we don't do this there is a chance ndo_poll_controller * or ndo_poll may be running while we open the device */ - ret = netpoll_rx_disable(dev); - if (ret) - return ret; + netpoll_rx_disable(dev); ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); @@ -1309,9 +1307,7 @@ static int __dev_close(struct net_device *dev) LIST_HEAD(single); /* Temporarily disable netpoll until the interface is down */ - retval = netpoll_rx_disable(dev); - if (retval) - return retval; + netpoll_rx_disable(dev); list_add(&dev->unreg_list, &single); retval = __dev_close_many(&single); @@ -1353,14 +1349,11 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { - int ret = 0; if (dev->flags & IFF_UP) { LIST_HEAD(single); /* Block netpoll rx while the interface is going down */ - ret = netpoll_rx_disable(dev); - if (ret) - return ret; + netpoll_rx_disable(dev); list_add(&dev->unreg_list, &single); dev_close_many(&single); @@ -1368,7 +1361,7 @@ int dev_close(struct net_device *dev) netpoll_rx_enable(dev); } - return ret; + return 0; } EXPORT_SYMBOL(dev_close); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cec074be8c4..37deedd48bc 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -247,7 +247,7 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } -int netpoll_rx_disable(struct net_device *dev) +void netpoll_rx_disable(struct net_device *dev) { struct netpoll_info *ni; int idx; @@ -257,7 +257,6 @@ int netpoll_rx_disable(struct net_device *dev) if (ni) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); - return 0; } EXPORT_SYMBOL(netpoll_rx_disable); -- cgit v1.2.3 From 095e7999c09afa09345db864427cb4bb4c98ae1c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 20 May 2013 12:20:54 +0530 Subject: net/9p: Make 9P2000.L the default protocol for 9p file system If we dont' specify a protocol version default to 9P2000.L. 9P2000.L have better support for posix semantic and is where all the recent development is happening. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 8eb75425e6e..812a4cdf530 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -127,7 +127,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) char *s; int ret = 0; - clnt->proto_version = p9_proto_2000u; + clnt->proto_version = p9_proto_2000L; clnt->msize = 8192; if (!opts) -- cgit v1.2.3 From 535bcd3c4e8d09a62a89b2f1f3b3a80e1b3fce4b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 20 May 2013 12:20:55 +0530 Subject: net/9p: Use virtio transpart as the default transport Make the default 9p experience better by defaulting to virtio transport if present. These days most of the users are using 9p in a virtualized setup Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 812a4cdf530..5e94dabddb6 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1014,6 +1014,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err < 0) goto destroy_tagpool; + if (!clnt->trans_mod) + clnt->trans_mod = v9fs_get_trans_by_name("virtio"); + if (!clnt->trans_mod) clnt->trans_mod = v9fs_get_default_trans(); -- cgit v1.2.3 From 42fe6484c639e8f79e09f81cb89f4f69db224997 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 20 May 2013 23:05:15 +0530 Subject: net/9p: Handle error in zero copy request correctly for 9p2000.u For zero copy request, error will be encoded in the user space buffer. So copy the error code correctly using copy_from_user. Here we use the extra bytes we allocate for zero copy request. If total error details are more than P9_ZC_HDR_SZ - 7 bytes, we return -EFAULT. The patch also avoid a memory allocation in the error path. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 55 ++++++++++++++++++------------------------------------- 1 file changed, 18 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 5e94dabddb6..01f1779eba8 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -562,36 +562,19 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, if (!p9_is_proto_dotl(c)) { /* Error is reported in string format */ - uint16_t len; - /* 7 = header size for RERROR, 2 is the size of string len; */ - int inline_len = in_hdrlen - (7 + 2); + int len; + /* 7 = header size for RERROR; */ + int inline_len = in_hdrlen - 7; - /* Read the size of error string */ - err = p9pdu_readf(req->rc, c->proto_version, "w", &len); - if (err) - goto out_err; - - ename = kmalloc(len + 1, GFP_NOFS); - if (!ename) { - err = -ENOMEM; + len = req->rc->size - req->rc->offset; + if (len > (P9_ZC_HDR_SZ - 7)) { + err = -EFAULT; goto out_err; } - if (len <= inline_len) { - /* We have error in protocol buffer itself */ - if (pdu_read(req->rc, ename, len)) { - err = -EFAULT; - goto out_free; - } - } else { - /* - * Part of the data is in user space buffer. - */ - if (pdu_read(req->rc, ename, inline_len)) { - err = -EFAULT; - goto out_free; - - } + ename = &req->rc->sdata[req->rc->offset]; + if (len > inline_len) { + /* We have error in external buffer */ if (kern_buf) { memcpy(ename + inline_len, uidata, len - inline_len); @@ -600,19 +583,19 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, uidata, len - inline_len); if (err) { err = -EFAULT; - goto out_free; + goto out_err; } } } - ename[len] = 0; - if (p9_is_proto_dotu(c)) { - /* For dotu we also have error code */ - err = p9pdu_readf(req->rc, - c->proto_version, "d", &ecode); - if (err) - goto out_free; + ename = NULL; + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); + if (err) + goto out_err; + + if (p9_is_proto_dotu(c)) err = -ecode; - } + if (!err || !IS_ERR_VALUE(err)) { err = p9_errstr2errno(ename, strlen(ename)); @@ -628,8 +611,6 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, } return err; -out_free: - kfree(ename); out_err: p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); return err; -- cgit v1.2.3 From 351638e7deeed2ec8ce451b53d33921b3da68f83 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 28 May 2013 01:30:21 +0000 Subject: net: pass info struct via netdevice notifier So far, only net_device * could be passed along with netdevice notifier event. This patch provides a possibility to pass custom structure able to provide info that event listener needs to know. Signed-off-by: Jiri Pirko v2->v3: fix typo on simeth shortened dev_getter shortened notifier_info struct name v1->v2: fix notifier_call parameter in call_netdevice_notifier() Signed-off-by: David S. Miller --- net/8021q/vlan.c | 2 +- net/appletalk/aarp.c | 2 +- net/appletalk/ddp.c | 2 +- net/atm/clip.c | 4 +-- net/atm/mpc.c | 6 ++-- net/ax25/af_ax25.c | 6 ++-- net/batman-adv/hard-interface.c | 2 +- net/bridge/br_notify.c | 2 +- net/caif/caif_dev.c | 4 +-- net/caif/caif_usb.c | 4 +-- net/can/af_can.c | 4 +-- net/can/bcm.c | 4 +-- net/can/gw.c | 4 +-- net/can/raw.c | 4 +-- net/core/dev.c | 56 +++++++++++++++++++++++++++++------- net/core/drop_monitor.c | 4 +-- net/core/dst.c | 2 +- net/core/fib_rules.c | 4 +-- net/core/netprio_cgroup.c | 2 +- net/core/pktgen.c | 2 +- net/core/rtnetlink.c | 2 +- net/decnet/af_decnet.c | 4 +-- net/ieee802154/6lowpan.c | 5 ++-- net/ipv4/arp.c | 2 +- net/ipv4/devinet.c | 2 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv6/addrconf.c | 4 +-- net/ipv6/ip6mr.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/netfilter/ip6t_MASQUERADE.c | 2 +- net/ipv6/route.c | 4 +-- net/ipx/af_ipx.c | 2 +- net/iucv/af_iucv.c | 2 +- net/mac80211/iface.c | 5 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 4 +-- net/netfilter/nfnetlink_queue_core.c | 2 +- net/netfilter/xt_TEE.c | 2 +- net/netlabel/netlabel_unlabeled.c | 7 ++--- net/netrom/af_netrom.c | 2 +- net/openvswitch/dp_notify.c | 2 +- net/packet/af_packet.c | 5 ++-- net/phonet/pn_dev.c | 4 +-- net/rose/af_rose.c | 6 ++-- net/sched/act_mirred.c | 2 +- net/tipc/eth_media.c | 4 +-- net/tipc/ib_media.c | 4 +-- net/wireless/core.c | 5 ++-- net/x25/af_x25.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 51 files changed, 124 insertions(+), 93 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9424f3718ea..2fb2d88e8c2 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -341,7 +341,7 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event) static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vlan_group *grp; struct vlan_info *vlan_info; int i, flgs; diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 173a2e82f48..690356fa52b 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -332,7 +332,7 @@ static void aarp_expire_timeout(unsigned long unused) static int aarp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); int ct; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ef12839a7cf..7fee50d637f 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -644,7 +644,7 @@ static inline void atalk_dev_down(struct net_device *dev) static int ddp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/atm/clip.c b/net/atm/clip.c index 8ae3a787933..cce241eb01d 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -539,9 +539,9 @@ static int clip_create(int number) } static int clip_device_event(struct notifier_block *this, unsigned long event, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/atm/mpc.c b/net/atm/mpc.c index d4cc1be5c36..3af12755cd0 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -998,14 +998,12 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc) } static int mpoa_event_listener(struct notifier_block *mpoa_notifier, - unsigned long event, void *dev_ptr) + unsigned long event, void *ptr) { - struct net_device *dev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpoa_client *mpc; struct lec_priv *priv; - dev = dev_ptr; - if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index e277e38f736..4b4d2b779ec 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -111,9 +111,9 @@ again: * Handle device status changes. */ static int ax25_device_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -1974,7 +1974,7 @@ static struct packet_type ax25_packet_type __read_mostly = { }; static struct notifier_block ax25_dev_notifier = { - .notifier_call =ax25_device_event, + .notifier_call = ax25_device_event, }; static int __init ax25_init(void) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 522243aff2f..b6504eac0ed 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -595,7 +595,7 @@ void batadv_hardif_remove_interfaces(void) static int batadv_hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *net_dev = ptr; + struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); struct batadv_hard_iface *hard_iface; struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 1644b3e1f94..3a3f371b284 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -31,7 +31,7 @@ struct notifier_block br_device_notifier = { */ static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net_bridge_port *p; struct net_bridge *br; bool changed_addr; diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 1f9ece1a9c3..4dca159435c 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -352,9 +352,9 @@ EXPORT_SYMBOL(caif_enroll_dev); /* notify Caif of device events */ static int caif_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct caif_device_entry *caifd = NULL; struct caif_dev_common *caifdev; struct cfcnfg *cfg; diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 942e00a425f..75ed04b78fa 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -121,9 +121,9 @@ static struct packet_type caif_usb_type __read_mostly = { }; static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct caif_dev_common common; struct cflayer *layer, *link_support; struct usbnet *usbnet; diff --git a/net/can/af_can.c b/net/can/af_can.c index c4e50852c9f..3ab8dd2e128 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -794,9 +794,9 @@ EXPORT_SYMBOL(can_proto_unregister); * af_can notifier to create/remove CAN netdevice specific structs */ static int can_notifier(struct notifier_block *nb, unsigned long msg, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dev_rcv_lists *d; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/can/bcm.c b/net/can/bcm.c index 8f113e6ff32..46f20bfafc0 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1350,9 +1350,9 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, * notification handler for netdevice status changes */ static int bcm_notifier(struct notifier_block *nb, unsigned long msg, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier); struct sock *sk = &bo->sk; struct bcm_op *op; diff --git a/net/can/gw.c b/net/can/gw.c index 3ee690e8c7d..2f291f961a1 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -445,9 +445,9 @@ static inline void cgw_unregister_filter(struct cgw_job *gwj) } static int cgw_notifier(struct notifier_block *nb, - unsigned long msg, void *data) + unsigned long msg, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/can/raw.c b/net/can/raw.c index 1085e65f848..641e1c89512 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -239,9 +239,9 @@ static int raw_enable_allfilters(struct net_device *dev, struct sock *sk) } static int raw_notifier(struct notifier_block *nb, - unsigned long msg, void *data) + unsigned long msg, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; diff --git a/net/core/dev.c b/net/core/dev.c index 5f747974ac5..54fce6006a8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1391,6 +1391,20 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); +static void netdev_notifier_info_init(struct netdev_notifier_info *info, + struct net_device *dev) +{ + info->dev = dev; +} + +static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, + struct net_device *dev) +{ + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, dev); + return nb->notifier_call(nb, val, &info); +} static int dev_boot_phase = 1; @@ -1423,7 +1437,7 @@ int register_netdevice_notifier(struct notifier_block *nb) goto unlock; for_each_net(net) { for_each_netdev(net, dev) { - err = nb->notifier_call(nb, NETDEV_REGISTER, dev); + err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); err = notifier_to_errno(err); if (err) goto rollback; @@ -1431,7 +1445,7 @@ int register_netdevice_notifier(struct notifier_block *nb) if (!(dev->flags & IFF_UP)) continue; - nb->notifier_call(nb, NETDEV_UP, dev); + call_netdevice_notifier(nb, NETDEV_UP, dev); } } @@ -1447,10 +1461,11 @@ rollback: goto outroll; if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } @@ -1488,10 +1503,11 @@ int unregister_netdevice_notifier(struct notifier_block *nb) for_each_net(net) { for_each_netdev(net, dev) { if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } unlock: @@ -1500,6 +1516,25 @@ unlock: } EXPORT_SYMBOL(unregister_netdevice_notifier); +/** + * call_netdevice_notifiers_info - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @info: notifier information data + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ + +int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, + struct netdev_notifier_info *info) +{ + ASSERT_RTNL(); + netdev_notifier_info_init(info, dev); + return raw_notifier_call_chain(&netdev_chain, val, info); +} +EXPORT_SYMBOL(call_netdevice_notifiers_info); + /** * call_netdevice_notifiers - call all network notifier blocks * @val: value passed unmodified to notifier function @@ -1511,8 +1546,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - ASSERT_RTNL(); - return raw_notifier_call_chain(&netdev_chain, val, dev); + struct netdev_notifier_info info; + + return call_netdevice_notifiers_info(val, dev, &info); } EXPORT_SYMBOL(call_netdevice_notifiers); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d23b6682f4e..5e78d44333b 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -295,9 +295,9 @@ static int net_dm_cmd_trace(struct sk_buff *skb, } static int dropmon_net_event(struct notifier_block *ev_block, - unsigned long event, void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dm_hw_stat_delta *new_stat = NULL; struct dm_hw_stat_delta *tmp; diff --git a/net/core/dst.c b/net/core/dst.c index df9cc810ec8..ca4231ec734 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -372,7 +372,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dst_entry *dst, *last = NULL; switch (event) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d5a9f8ead0d..21735440c44 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -705,9 +705,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) static int fib_rules_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct fib_rules_ops *ops; diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 0777d0aa18c..e533259dce3 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -261,7 +261,7 @@ struct cgroup_subsys net_prio_subsys = { static int netprio_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netprio_map *old; /* diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 11f2704c381..795498fd458 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1921,7 +1921,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id); if (pn->pktgen_exiting) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a08bd2b7fe3..49c14451d8a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2667,7 +2667,7 @@ static void rtnetlink_rcv(struct sk_buff *skb) static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_UP: diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index c21f200eed9..dd4d506ef92 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2078,9 +2078,9 @@ out_err: } static int dn_device_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 55e1fd5b3e5..3b9d5f20bd1 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1352,10 +1352,9 @@ static inline void lowpan_netlink_fini(void) } static int lowpan_device_event(struct notifier_block *unused, - unsigned long event, - void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); LIST_HEAD(del_list); struct lowpan_dev_record *entry, *tmp; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 247ec1951c3..bf574029a18 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1234,7 +1234,7 @@ out: static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_CHANGEADDR: diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dfc39d4d48b..b047e2d8a61 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1333,7 +1333,7 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c7629a209f9..05a4888dede 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1038,7 +1038,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev; struct net *net = dev_net(dev); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d9610ae785..f975399f352 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1609,7 +1609,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr_table *mrt; struct vif_device *v; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 5d5d4d1be9c..dd5508bde79 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -108,7 +108,7 @@ static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ptr; + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 432e084b6b6..bce073b4bbd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2826,9 +2826,9 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) } static int addrconf_notify(struct notifier_block *this, unsigned long event, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *) data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; int err; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 241fb8ad9fc..583e8d435f9 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1319,7 +1319,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, static int ip6mr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr6_table *mrt; struct mif_device *v; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2712ab22a17..a0962697a25 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1568,7 +1568,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct inet6_dev *idev; diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 60e9053bab0..b76257cd7e1 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -71,7 +71,7 @@ static int device_cmp(struct nf_conn *ct, void *ifindex) static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ptr; + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad0aa6b0b86..194c3cde153 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2681,9 +2681,9 @@ errout: } static int ip6_route_dev_notify(struct notifier_block *this, - unsigned long event, void *data) + unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f547a47d381..7a1e0fc1bd4 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -330,7 +330,7 @@ static __inline__ void __ipxitf_put(struct ipx_interface *intrfc) static int ipxitf_device_event(struct notifier_block *notifier, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ipx_interface *i, *tmp; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index ae691651b72..168aff5e60d 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2293,7 +2293,7 @@ out_unlock: static int afiucv_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *event_dev = (struct net_device *)ptr; + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); struct sock *sk; struct iucv_sock *iucv; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 60f1ce5e5e5..d2c3fd178db 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1717,10 +1717,9 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } static int netdev_notify(struct notifier_block *nb, - unsigned long state, - void *ndev) + unsigned long state, void *ptr) { - struct net_device *dev = ndev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ieee80211_sub_if_data *sdata; if (state != NETDEV_CHANGENAME) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5b142fb1648..7c3ed429789 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1487,9 +1487,9 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) * Currently only NETDEV_DOWN is handled to release refs to cached dsts */ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_service *svc; diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 4e27fa03581..0f2ac8f2e7b 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -800,7 +800,7 @@ static int nfqnl_rcv_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index bd93e51d30a..292934d2348 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -200,7 +200,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct xt_tee_priv *priv; priv = container_of(this, struct xt_tee_priv, notifier); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 8a6c6ea466d..af3531926ee 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -708,7 +708,7 @@ unlhsh_remove_return: * netlbl_unlhsh_netdev_handler - Network device notification handler * @this: notifier block * @event: the event - * @ptr: the network device (cast to void) + * @ptr: the netdevice notifier info (cast to void) * * Description: * Handle network device events, although at present all we care about is a @@ -717,10 +717,9 @@ unlhsh_remove_return: * */ static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, - unsigned long event, - void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netlbl_unlhsh_iface *iface = NULL; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ec0c80fde69..698814bfa7a 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -117,7 +117,7 @@ static void nr_kill_by_device(struct net_device *dev) */ static int nr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index ef4feec6cd8..c3235675f35 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -78,7 +78,7 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct ovs_net *ovs_net; - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vport *vport = NULL; if (!ovs_is_internal_dev(dev)) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8ec1bca7f85..79fe63246b2 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3331,10 +3331,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, } -static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) +static int packet_notifier(struct notifier_block *this, + unsigned long msg, void *ptr) { struct sock *sk; - struct net_device *dev = data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); rcu_read_lock(); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 45a7df6575d..56a6146ac94 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -292,9 +292,9 @@ static void phonet_route_autodel(struct net_device *dev) /* notify Phonet of device events */ static int phonet_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (what) { case NETDEV_REGISTER: diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 9c834745159..e98fcfbe600 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -202,10 +202,10 @@ static void rose_kill_by_device(struct net_device *dev) /* * Handle device status changes. */ -static int rose_device_event(struct notifier_block *this, unsigned long event, - void *ptr) +static int rose_device_event(struct notifier_block *this, + unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5d676edc22a..977c10e0631 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -243,7 +243,7 @@ nla_put_failure: static int mirred_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tcf_mirred *m; if (event == NETDEV_UNREGISTER) diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 120a676a336..fc60bea6316 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -251,9 +251,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr) * specified device. */ static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv) + void *ptr) { - struct net_device *dev = (struct net_device *)dv; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 2a2864c25e1..baa9df4327d 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -244,9 +244,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr) * specified device. */ static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv) + void *ptr) { - struct net_device *dev = (struct net_device *)dv; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ib_bearer *ib_ptr = &ib_bearers[0]; struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; diff --git a/net/wireless/core.c b/net/wireless/core.c index 73405e00c80..01e41191f1b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -886,10 +886,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, } static int cfg80211_netdev_notifier_call(struct notifier_block *nb, - unsigned long state, - void *ndev) + unsigned long state, void *ptr) { - struct net_device *dev = ndev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; int ret; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 37ca9694aab..1d964e23853 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -224,7 +224,7 @@ static void x25_kill_by_device(struct net_device *dev) static int x25_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct x25_neigh *nb; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 23cea0f7433..536ccc95de8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2784,7 +2784,7 @@ static void __net_init xfrm_dst_ops_init(struct net *net) static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_DOWN: -- cgit v1.2.3 From be9efd3653284f2827fd82861e8e9db9a8f726e1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 28 May 2013 01:30:22 +0000 Subject: net: pass changed flags along with NETDEV_CHANGE event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use new netdevice notifier infrastructure to pass along changed flags. Signed-off-by: Timo Teräs Signed-off-by: Jiri Pirko v2->v3: shortened notifier_info struct name Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 54fce6006a8..6eb621cc3b8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4771,8 +4771,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) } if (dev->flags & IFF_UP && - (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) - call_netdevice_notifiers(NETDEV_CHANGE, dev); + (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { + struct netdev_notifier_change_info change_info; + + change_info.flags_changed = changes; + call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + &change_info.info); + } } /** -- cgit v1.2.3 From 6c8b4e3ff81b82fc153625e81e60af1d89de2c32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Tue, 28 May 2013 01:30:23 +0000 Subject: arp: flush arp cache on IFF_NOARP change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IFF_NOARP affects what kind of neighbor entries are created (nud NOARP or nud INCOMPLETE). If the flag changes, flush the arp cache to refresh all entries. Signed-off-by: Timo Teräs Signed-off-by: Jiri Pirko v2->v3: shortened notifier_info struct name Signed-off-by: David S. Miller --- net/ipv4/arp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index bf574029a18..4429b013f26 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1235,12 +1235,18 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); rt_cache_flush(dev_net(dev)); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&arp_tbl, dev); + break; default: break; } -- cgit v1.2.3 From 06ecf24bdf2b7afc6c8fd13de6dba2a96dd331b6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 28 May 2013 13:15:50 -0700 Subject: net: Fix build warnings after mac_header and transport_header became __u16. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/core/skbuff.c: In function ‘__alloc_skb_head’: net/core/skbuff.c:203:2: warning: large integer implicitly truncated to unsigned type [-Woverflow] net/core/skbuff.c: In function ‘__alloc_skb’: net/core/skbuff.c:279:2: warning: large integer implicitly truncated to unsigned type [-Woverflow] net/core/skbuff.c:280:2: warning: large integer implicitly truncated to unsigned type [-Woverflow] net/core/skbuff.c: In function ‘build_skb’: net/core/skbuff.c:348:2: warning: large integer implicitly truncated to unsigned type [-Woverflow] net/core/skbuff.c:349:2: warning: large integer implicitly truncated to unsigned type [-Woverflow] Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d6298914f4e..f45de077ab9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -200,7 +200,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) atomic_set(&skb->users, 1); #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; + skb->mac_header = (__u16) ~0U; #endif out: return skb; @@ -276,8 +276,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb_reset_tail_pointer(skb); skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; - skb->transport_header = ~0U; + skb->mac_header = (__u16) ~0U; + skb->transport_header = (__u16) ~0U; #endif /* make sure we initialize shinfo sequentially */ @@ -345,8 +345,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) skb_reset_tail_pointer(skb); skb->end = skb->tail + size; #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; - skb->transport_header = ~0U; + skb->mac_header = (__u16) ~0U; + skb->transport_header = (__u16) ~0U; #endif /* make sure we initialize shinfo sequentially */ -- cgit v1.2.3 From de68d1003d9eb0a5f7d4714315614e4bc956f68e Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sat, 9 Mar 2013 23:14:22 +0100 Subject: batman-adv: split batadv_is_wifi_iface() into two functions Previously batadv_is_wifi_iface() did two things at once: looking up a net_device from an interface index, and determining if it is a wifi device. The second part is useful itself when the caller already has a net_device reference. Signed-off-by: Matthias Schiffer Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/hard-interface.c | 84 +++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index b6504eac0ed..d5ec67b6325 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -117,6 +117,58 @@ static int batadv_is_valid_iface(const struct net_device *net_dev) return 1; } +/** + * batadv_is_wifi_netdev - check if the given net_device struct is a wifi + * interface + * @net_device: the device to check + * + * Returns true if the net device is a 802.11 wireless device, false otherwise. + */ +static bool batadv_is_wifi_netdev(struct net_device *net_device) +{ +#ifdef CONFIG_WIRELESS_EXT + /* pre-cfg80211 drivers have to implement WEXT, so it is possible to + * check for wireless_handlers != NULL + */ + if (net_device->wireless_handlers) + return true; +#endif + + /* cfg80211 drivers have to set ieee80211_ptr */ + if (net_device->ieee80211_ptr) + return true; + + return false; +} + +/** + * batadv_is_wifi_iface - check if the given interface represented by ifindex + * is a wifi interface + * @ifindex: interface index to check + * + * Returns true if the interface represented by ifindex is a 802.11 wireless + * device, false otherwise. + */ +bool batadv_is_wifi_iface(int ifindex) +{ + struct net_device *net_device = NULL; + bool ret = false; + + if (ifindex == BATADV_NULL_IFINDEX) + goto out; + + net_device = dev_get_by_index(&init_net, ifindex); + if (!net_device) + goto out; + + ret = batadv_is_wifi_netdev(net_device); + +out: + if (net_device) + dev_put(net_device); + return ret; +} + static struct batadv_hard_iface * batadv_hardif_get_active(const struct net_device *soft_iface) { @@ -657,38 +709,6 @@ out: return NOTIFY_DONE; } -/* This function returns true if the interface represented by ifindex is a - * 802.11 wireless device - */ -bool batadv_is_wifi_iface(int ifindex) -{ - struct net_device *net_device = NULL; - bool ret = false; - - if (ifindex == BATADV_NULL_IFINDEX) - goto out; - - net_device = dev_get_by_index(&init_net, ifindex); - if (!net_device) - goto out; - -#ifdef CONFIG_WIRELESS_EXT - /* pre-cfg80211 drivers have to implement WEXT, so it is possible to - * check for wireless_handlers != NULL - */ - if (net_device->wireless_handlers) - ret = true; - else -#endif - /* cfg80211 drivers have to set ieee80211_ptr */ - if (net_device->ieee80211_ptr) - ret = true; -out: - if (net_device) - dev_put(net_device); - return ret; -} - struct notifier_block batadv_hard_if_notifier = { .notifier_call = batadv_hard_if_event, }; -- cgit v1.2.3 From caf65bfcc5dbabd7222fa45fdcd42ce0783d7a42 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sat, 9 Mar 2013 23:14:23 +0100 Subject: batman-adv: send each broadcast only once on non-wireless interfaces While it makes sense to send each broadcast thrice on 802.11 (WLAN) interfaces as broadcasts are often unreliable on these, there is no reason to do so on other interface types. The increased the overhead can be harmful on low-bandwidth links like VPN connections over slow internet lines, therefore it is better to reduce the number of broadcast packets sent on non-wireless links to one. Signed-off-by: Matthias Schiffer Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/hard-interface.c | 4 ++++ net/batman-adv/main.h | 5 +++++ net/batman-adv/send.c | 5 ++++- net/batman-adv/types.h | 2 ++ 4 files changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index d5ec67b6325..4a76ed654c9 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -593,6 +593,10 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_WORK(&hard_iface->cleanup_work, batadv_hardif_remove_interface_finish); + hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; + if (batadv_is_wifi_netdev(net_dev)) + hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; + /* extra reference for return */ atomic_set(&hard_iface->refcount, 2); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 59a0d6af15c..ea1a3bafe9c 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -76,6 +76,11 @@ #define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */ +/* number of packets to send for broadcasts on different interface types */ +#define BATADV_NUM_BCASTS_DEFAULT 1 +#define BATADV_NUM_BCASTS_WIRELESS 3 +#define BATADV_NUM_BCASTS_MAX 3 + /* msecs after which an ARP_REQUEST is sent in broadcast as fallback */ #define ARP_REQ_DELAY 250 /* numbers of originator to contact for any PUT/GET DHT operation */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 263cfd1ccee..eb16b04d4be 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -260,6 +260,9 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (hard_iface->soft_iface != soft_iface) continue; + if (forw_packet->num_packets >= hard_iface->num_bcasts) + continue; + /* send a copy of the saved skb */ skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); if (skb1) @@ -271,7 +274,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) forw_packet->num_packets++; /* if we still have some more bcasts to send */ - if (forw_packet->num_packets < 3) { + if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) { _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, msecs_to_jiffies(5)); return; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index aba8364c368..5f542bdd9a4 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -61,6 +61,7 @@ struct batadv_hard_iface_bat_iv { * @if_status: status of the interface for batman-adv * @net_dev: pointer to the net_device * @frag_seqno: last fragment sequence number sent by this interface + * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) * @hardif_obj: kobject of the per interface sysfs "mesh" directory * @refcount: number of contexts the object is used * @batman_adv_ptype: packet type describing packets that should be processed by @@ -76,6 +77,7 @@ struct batadv_hard_iface { char if_status; struct net_device *net_dev; atomic_t frag_seqno; + uint8_t num_bcasts; struct kobject *hardif_obj; atomic_t refcount; struct packet_type batman_adv_ptype; -- cgit v1.2.3 From 93178018eb35aaa6ebcb5a136dce7eb3add011ab Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Sun, 10 Mar 2013 19:29:15 +0800 Subject: batman-adv: fix typos in kernel doc & comments Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/distributed-arp-table.c | 82 ++++++++++++++++------------------ 1 file changed, 39 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 239992021b1..06345d40158 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -45,9 +45,9 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_dat_entry_free_ref - decrements the dat_entry refcounter and possibly + * batadv_dat_entry_free_ref - decrement the dat_entry refcounter and possibly * free it - * @dat_entry: the oentry to free + * @dat_entry: the entry to free */ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) { @@ -56,10 +56,10 @@ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) } /** - * batadv_dat_to_purge - checks whether a dat_entry has to be purged or not + * batadv_dat_to_purge - check whether a dat_entry has to be purged or not * @dat_entry: the entry to check * - * Returns true if the entry has to be purged now, false otherwise + * Returns true if the entry has to be purged now, false otherwise. */ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) { @@ -75,8 +75,8 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) * returns a boolean value: true is the entry has to be deleted, * false otherwise * - * Loops over each entry in the DAT local storage and delete it if and only if - * the to_purge function passed as argument returns true + * Loops over each entry in the DAT local storage and deletes it if and only if + * the to_purge function passed as argument returns true. */ static void __batadv_dat_purge(struct batadv_priv *bat_priv, bool (*to_purge)(struct batadv_dat_entry *)) @@ -97,7 +97,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, spin_lock_bh(list_lock); hlist_for_each_entry_safe(dat_entry, node_tmp, head, hash_entry) { - /* if an helper function has been passed as parameter, + /* if a helper function has been passed as parameter, * ask it if the entry has to be purged or not */ if (to_purge && !to_purge(dat_entry)) @@ -134,7 +134,7 @@ static void batadv_dat_purge(struct work_struct *work) * @node: node in the local table * @data2: second object to compare the node to * - * Returns 1 if the two entry are the same, 0 otherwise + * Returns 1 if the two entries are the same, 0 otherwise. */ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) { @@ -149,7 +149,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_src field in the ARP packet + * Returns the value of the hw_src field in the ARP packet. */ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) { @@ -166,7 +166,7 @@ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_src field in the ARP packet + * Returns the value of the ip_src field in the ARP packet. */ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) { @@ -178,7 +178,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_dst field in the ARP packet + * Returns the value of the hw_dst field in the ARP packet. */ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) { @@ -190,7 +190,7 @@ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_dst field in the ARP packet + * Returns the value of the ip_dst field in the ARP packet. */ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) { @@ -202,7 +202,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) * @data: data to hash * @size: size of the hash table * - * Returns the selected index in the hash table for the given data + * Returns the selected index in the hash table for the given data. */ static uint32_t batadv_hash_dat(const void *data, uint32_t size) { @@ -224,12 +224,12 @@ static uint32_t batadv_hash_dat(const void *data, uint32_t size) } /** - * batadv_dat_entry_hash_find - looks for a given dat_entry in the local hash + * batadv_dat_entry_hash_find - look for a given dat_entry in the local hash * table * @bat_priv: the bat priv with all the soft interface information * @ip: search key * - * Returns the dat_entry if found, NULL otherwise + * Returns the dat_entry if found, NULL otherwise. */ static struct batadv_dat_entry * batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) @@ -343,9 +343,6 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, if (hdr_size == 0) return; - /* if the ARP packet is encapsulated in a batman packet, let's print - * some debug messages - */ unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; switch (unicast_4addr_packet->u.header.packet_type) { @@ -409,7 +406,8 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, * @candidate: orig_node under evaluation * @max_orig_node: last selected candidate * - * Returns true if the node has been elected as next candidate or false othrwise + * Returns true if the node has been elected as next candidate or false + * otherwise. */ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, int select, batadv_dat_addr_t tmp_max, @@ -472,7 +470,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, */ cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND; - /* iterate over the originator list and find the node with closest + /* iterate over the originator list and find the node with the closest * dat_address which has not been selected yet */ for (i = 0; i < hash->size; i++) { @@ -480,7 +478,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - /* the dht space is a ring and addresses are unsigned */ + /* the dht space is a ring using unsigned addresses */ tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr + ip_key; @@ -512,7 +510,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, } /** - * batadv_dat_select_candidates - selects the nodes which the DHT message has to + * batadv_dat_select_candidates - select the nodes which the DHT message has to * be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT @@ -521,7 +519,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * closest values (from the LEFT, with wrap around if needed) then the hash * value of the key. ip_dst is the key. * - * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM + * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) @@ -558,10 +556,11 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * @ip: the DHT key * @packet_subtype: unicast4addr packet subtype to use * - * In this function the skb is copied by means of pskb_copy() and is sent as - * unicast packet to each of the selected candidates + * This function copies the skb with pskb_copy() and is sent as unicast packet + * to each of the selected candidates. * - * Returns true if the packet is sent to at least one candidate, false otherwise + * Returns true if the packet is sent to at least one candidate, false + * otherwise. */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *skb, __be32 ip, @@ -727,7 +726,7 @@ out: * @skb: packet to analyse * @hdr_size: size of the possible header before the ARP packet in the skb * - * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise + * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise. */ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -754,9 +753,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN); - /* Check whether the ARP packet carries a valid - * IP information - */ + /* check whether the ARP packet carries a valid IP information */ if (arphdr->ar_hrd != htons(ARPHRD_ETHER)) goto out; @@ -784,7 +781,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src)) goto out; - /* we don't care about the destination MAC address in ARP requests */ + /* don't care about the destination MAC address in ARP requests */ if (arphdr->ar_op != htons(ARPOP_REQUEST)) { hw_dst = batadv_arp_hw_dst(skb, hdr_size); if (is_zero_ether_addr(hw_dst) || @@ -804,8 +801,8 @@ out: * @skb: packet to check * * Returns true if the message has been sent to the dht candidates, false - * otherwise. In case of true the message has to be enqueued to permit the - * fallback + * otherwise. In case of a positive return value the message has to be enqueued + * to permit the fallback. */ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb) @@ -867,7 +864,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); ret = true; } else { - /* Send the request on the DHT */ + /* Send the request to the DHT */ ret = batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_GET); } @@ -884,7 +881,7 @@ out: * @skb: packet to check * @hdr_size: size of the encapsulation header * - * Returns true if the request has been answered, false otherwise + * Returns true if the request has been answered, false otherwise. */ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -924,10 +921,9 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (!skb_new) goto out; - /* to preserve backwards compatibility, here the node has to answer - * using the same packet type it received for the request. This is due - * to that if a node is not using the 4addr packet format it may not - * support it. + /* To preserve backwards compatibility, the node has choose the outgoing + * format based on the incoming request packet type. The assumption is + * that a node not using the 4addr packet format doesn't support it. */ if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) err = batadv_unicast_4addr_send_skb(bat_priv, skb_new, @@ -977,7 +973,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); /* Send the ARP reply to the candidates for both the IP addresses that - * the node got within the ARP reply + * the node obtained from the ARP reply */ batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); @@ -987,7 +983,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, * DAT storage only * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check - * @hdr_size: siaze of the encapsulation header + * @hdr_size: size of the encapsulation header */ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -1031,11 +1027,11 @@ out: /** * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped - * (because the node has already got the reply via DAT) or not + * (because the node has already obtained the reply via DAT) or not * @bat_priv: the bat priv with all the soft interface information * @forw_packet: the broadcast packet * - * Returns true if the node can drop the packet, false otherwise + * Returns true if the node can drop the packet, false otherwise. */ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet) -- cgit v1.2.3 From 863dd7a82a2fba3e1a094c3d10a9cc8d1afd10d6 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 25 Mar 2013 13:49:46 +0100 Subject: batman-adv: drop useless argument seqno in neighbor creation the sequence number is not stored in struct neigh_node, therefore there is no need to pass such value to the neigh_node creation procedure. At the moment the value is only used by a debug message, but given the fact that the seqno is not related to the neighbor object, it is better to print it elsewhere. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 11 ++++------- net/batman-adv/originator.c | 5 ++--- net/batman-adv/originator.h | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 071f288b77a..da239c5424b 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -33,12 +33,11 @@ static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const uint8_t *neigh_addr, struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh, __be32 seqno) + struct batadv_orig_node *orig_neigh) { struct batadv_neigh_node *neigh_node; - neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, - ntohl(seqno)); + neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr); if (!neigh_node) goto out; @@ -696,8 +695,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, neigh_node = batadv_iv_ogm_neigh_new(if_incoming, ethhdr->h_source, - orig_node, orig_tmp, - batadv_ogm_packet->seqno); + orig_node, orig_tmp); batadv_orig_node_free_ref(orig_tmp); if (!neigh_node) @@ -829,8 +827,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, neigh_node = batadv_iv_ogm_neigh_new(if_incoming, orig_neigh_node->orig, orig_neigh_node, - orig_neigh_node, - batadv_ogm_packet->seqno); + orig_neigh_node); if (!neigh_node) goto out; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index fad1a2093e1..ddf56348479 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -92,7 +92,7 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node) struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, uint32_t seqno) + const uint8_t *neigh_addr) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_neigh_node *neigh_node; @@ -110,8 +110,7 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, atomic_set(&neigh_node->refcount, 2); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Creating new neighbor %pM, initial seqno %d\n", - neigh_addr, seqno); + "Creating new neighbor %pM\n", neigh_addr); out: return neigh_node; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 734e5a3d8a5..7887b84a9af 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -31,7 +31,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, uint32_t seqno); + const uint8_t *neigh_addr); void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); struct batadv_neigh_node * batadv_orig_node_get_router(struct batadv_orig_node *orig_node); -- cgit v1.2.3 From d1dc30739c587fe65f4120c045258ab01c79db1b Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 25 Mar 2013 13:54:45 +0100 Subject: batman-adv: slightly improve neighbor creation debug message print the interface along with the new neighbor mac address Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/originator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index ddf56348479..f50553a7de6 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -110,7 +110,8 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, atomic_set(&neigh_node->refcount, 2); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Creating new neighbor %pM\n", neigh_addr); + "Creating new neighbor %pM on interface %s\n", neigh_addr, + hard_iface->net_dev->name); out: return neigh_node; -- cgit v1.2.3 From 281581d3e79eaacfcdc0827e9bf990422252ba5c Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 25 Mar 2013 22:27:00 +0100 Subject: batman-adv: don't check the source address twice The source address has already been checked in batadv_check_management_packet() upon packet reception and therefore it does not need to be checked again in ogm_process() Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index da239c5424b..6d62e2992df 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -988,7 +988,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, struct batadv_neigh_node *orig_neigh_router = NULL; int has_directlink_flag; int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; - int is_broadcast = 0, is_bidirect; + int is_bidirect; bool is_single_hop_neigh = false; bool is_from_best_next_hop = false; int is_duplicate, sameseq, simlar_ttl; @@ -1051,9 +1051,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if (batadv_compare_eth(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr)) is_my_oldorig = 1; - - if (is_broadcast_ether_addr(ethhdr->h_source)) - is_broadcast = 1; } rcu_read_unlock(); @@ -1071,13 +1068,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, return; } - if (is_broadcast) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: ignoring all packets with broadcast source addr (sender: %pM)\n", - ethhdr->h_source); - return; - } - if (is_my_orig) { unsigned long *word; int offset; -- cgit v1.2.3 From a3b81b67de6ae0475c0c34c552c059f7bea2f520 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 28 Mar 2013 14:21:12 +0100 Subject: batman-adv: don't check compat version twice Compatibility version is checked upon packet reception before calling any handler. For this reason it does need to be checked once more in the handler itself. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 6d62e2992df..38183dc6965 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1054,13 +1054,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, } rcu_read_unlock(); - if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: incompatible batman version (%i)\n", - batadv_ogm_packet->header.version); - return; - } - if (is_my_addr) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: received my own broadcast (sender: %pM)\n", -- cgit v1.2.3 From 38dc40ef52d882e08b2af71fc1b5413ac7009952 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sat, 30 Mar 2013 17:22:00 +0100 Subject: batman-adv: do not silently ignore wrong condition Only one neigh_node per orig_node should match a given neighbor address, therefore, if more than one matching neigh_node is found, a WARNING has to be triggered to let the user know that something is wrong in the originator state instead of silently skipping the error. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 38183dc6965..bd50e0d76ae 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -669,7 +669,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && tmp_neigh_node->if_incoming == if_incoming && atomic_inc_not_zero(&tmp_neigh_node->refcount)) { - if (neigh_node) + if (WARN(neigh_node, "too many matching neigh_nodes")) batadv_neigh_node_free_ref(neigh_node); neigh_node = tmp_neigh_node; continue; -- cgit v1.2.3 From 7db3fc291bb22bf43667b009dd0e701ed4eb7c96 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 2 Apr 2013 12:16:53 +0200 Subject: batman-adv: don't initialise batman_iv private members in hard-interface.c hard-interface.c has to do not contain any routing algorithm specific code. Allocate the hard-interface with kzalloc() and remove any useless and algorithm specific member initialisation Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/hard-interface.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 4a76ed654c9..c478e6bcf89 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -577,7 +577,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) dev_hold(net_dev); - hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC); + hard_iface = kzalloc(sizeof(*hard_iface), GFP_ATOMIC); if (!hard_iface) goto release_dev; @@ -603,12 +603,6 @@ batadv_hardif_add_interface(struct net_device *net_dev) batadv_check_known_mac_addr(hard_iface->net_dev); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); - /* This can't be called via a bat_priv callback because - * we have no bat_priv yet. - */ - atomic_set(&hard_iface->bat_iv.ogm_seqno, 1); - hard_iface->bat_iv.ogm_buff = NULL; - return hard_iface; free_if: -- cgit v1.2.3 From 7ed4be9523455a061e62236dc3caa9211cd7edda Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 8 Apr 2013 15:08:18 +0200 Subject: batman-adv: use eth_hdr() when it makes sense Instead of casting the result of skb_mac_header() to "struct ethhdr *" every time, the eth_hdr inline function can be use to beautify the code and improve its readability. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 2 +- net/batman-adv/bridge_loop_avoidance.c | 8 ++++---- net/batman-adv/network-coding.c | 10 +++++----- net/batman-adv/routing.c | 12 ++++++------ net/batman-adv/send.c | 2 +- net/batman-adv/soft-interface.c | 2 +- 6 files changed, 18 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index bd50e0d76ae..ef41be49b31 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1268,7 +1268,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, skb->len + ETH_HLEN); packet_len = skb_headlen(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); packet_buff = skb->data; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 379061c7254..082189e2e40 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -864,7 +864,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, short vid = -1; int ret; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { vhdr = (struct vlan_ethhdr *)ethhdr; @@ -885,7 +885,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, return 0; /* pskb_may_pull() may have modified the pointers, get ethhdr again */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); arphdr = (struct arphdr *)((uint8_t *)ethhdr + headlen); /* Check whether the ARP frame carries a valid @@ -1432,7 +1432,7 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, struct batadv_hard_iface *primary_if; int ret; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) @@ -1539,7 +1539,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) if (batadv_bla_process_claim(bat_priv, primary_if, skb)) goto handled; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index e84629ece9b..0787a34609b 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1245,7 +1245,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, return; /* Set the mac header as if we actually sent the packet uncoded */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN); memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN); @@ -1423,7 +1423,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, { struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; @@ -1482,7 +1482,7 @@ out: void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb) { - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); if (batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return; @@ -1533,7 +1533,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, skb_reset_network_header(skb); /* Reconstruct original mac header */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr, ðhdr_tmp, sizeof(*ethhdr)); /* Select the correct unicast header information based on the location @@ -1677,7 +1677,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, return NET_RX_DROP; coded_packet = (struct batadv_coded_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* Verify frame is destined for us */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index b27a4d792d1..beeab2e8cd6 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -256,7 +256,7 @@ bool batadv_check_management_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, header_len))) return false; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) @@ -392,7 +392,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, hdr_size))) goto out; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) @@ -569,7 +569,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, if (unlikely(!pskb_may_pull(skb, hdr_size))) return -ENODATA; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) @@ -803,7 +803,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL; struct batadv_neigh_node *neigh_node = NULL; struct batadv_unicast_packet *unicast_packet; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); int ret = NET_RX_DROP; struct sk_buff *new_skb; @@ -1165,7 +1165,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, hdr_size))) goto out; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) @@ -1265,7 +1265,7 @@ int batadv_recv_vis_packet(struct sk_buff *skb, return NET_RX_DROP; vis_packet = (struct batadv_vis_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* not for me */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index eb16b04d4be..ed7072ad84e 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -61,7 +61,7 @@ int batadv_send_skb_packet(struct sk_buff *skb, skb_reset_mac_header(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN); memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 819dfb006cd..b26a6cdb934 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -316,7 +316,7 @@ void batadv_interface_rx(struct net_device *soft_iface, skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: -- cgit v1.2.3 From 24a5deeb8a198f0a26ae04485d9976c5e414f723 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 8 Apr 2013 09:38:12 +0200 Subject: batman-adv: move ring_buffer helper functions in bat_iv_ogm the two lonely ring_buffer helper functions are used by the bat_iv_ogm module only and therefore they can be moved inside it. Reported-by: Marek Lindner Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/Makefile | 1 - net/batman-adv/bat_iv_ogm.c | 43 ++++++++++++++++++++++++++++++++++++- net/batman-adv/ring_buffer.c | 51 -------------------------------------------- net/batman-adv/ring_buffer.h | 27 ----------------------- 4 files changed, 42 insertions(+), 80 deletions(-) delete mode 100644 net/batman-adv/ring_buffer.c delete mode 100644 net/batman-adv/ring_buffer.h (limited to 'net') diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index acbac2a9c62..489bb36f1b9 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -32,7 +32,6 @@ batman-adv-y += icmp_socket.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o -batman-adv-y += ring_buffer.o batman-adv-y += routing.o batman-adv-y += send.o batman-adv-y += soft-interface.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index ef41be49b31..31c2891c2cd 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -19,7 +19,6 @@ #include "main.h" #include "translation-table.h" -#include "ring_buffer.h" #include "originator.h" #include "routing.h" #include "gateway_common.h" @@ -29,6 +28,48 @@ #include "bat_algo.h" #include "network-coding.h" +/** + * batadv_ring_buffer_set - update the ring buffer with the given value + * @lq_recv: pointer to the ring buffer + * @lq_index: index to store the value at + * @value: value to store in the ring buffer + */ +static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, + uint8_t value) +{ + lq_recv[*lq_index] = value; + *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; +} + +/** + * batadv_ring_buffer_set - compute the average of all non-zero values stored + * in the given ring buffer + * @lq_recv: pointer to the ring buffer + * + * Returns computed average value. + */ +static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) +{ + const uint8_t *ptr; + uint16_t count = 0, i = 0, sum = 0; + + ptr = lq_recv; + + while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) { + if (*ptr != 0) { + count++; + sum += *ptr; + } + + i++; + ptr++; + } + + if (count == 0) + return 0; + + return (uint8_t)(sum / count); +} static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const uint8_t *neigh_addr, diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c deleted file mode 100644 index ccab0bbdbb5..00000000000 --- a/net/batman-adv/ring_buffer.c +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: - * - * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#include "main.h" -#include "ring_buffer.h" - -void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, - uint8_t value) -{ - lq_recv[*lq_index] = value; - *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; -} - -uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) -{ - const uint8_t *ptr; - uint16_t count = 0, i = 0, sum = 0; - - ptr = lq_recv; - - while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) { - if (*ptr != 0) { - count++; - sum += *ptr; - } - - i++; - ptr++; - } - - if (count == 0) - return 0; - - return (uint8_t)(sum / count); -} diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h deleted file mode 100644 index 3f92ae248e8..00000000000 --- a/net/batman-adv/ring_buffer.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: - * - * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#ifndef _NET_BATMAN_ADV_RING_BUFFER_H_ -#define _NET_BATMAN_ADV_RING_BUFFER_H_ - -void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, - uint8_t value); -uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]); - -#endif /* _NET_BATMAN_ADV_RING_BUFFER_H_ */ -- cgit v1.2.3 From d98966173213704873864c4e5057d823996ae95d Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 17 Apr 2013 17:44:43 +0200 Subject: batman-adv: move batadv_slide_own_bcast_window to bat_iv_ogm.c batadv_slide_own_bcast_window() is used only in bat_iv_ogm.c and it is currently touching only batman_iv specific attributes. Move it into bat_iv_ogm.c and make it static. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 37 ++++++++++++++++++++++++++++++++++++- net/batman-adv/routing.c | 29 ----------------------------- net/batman-adv/routing.h | 1 - 3 files changed, 36 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 31c2891c2cd..42b7a94d61b 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -630,6 +630,41 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, if_incoming, 0, batadv_iv_ogm_fwd_send_time()); } +/** + * batadv_iv_ogm_slide_own_bcast_window - bitshift own OGM broadcast windows for + * the given interface + * @hard_iface: the interface for which the windows have to be shifted + */ +static void +batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + struct batadv_orig_node *orig_node; + unsigned long *word; + uint32_t i; + size_t word_index; + uint8_t *w; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + spin_lock_bh(&orig_node->ogm_cnt_lock); + word_index = hard_iface->if_num * BATADV_NUM_WORDS; + word = &(orig_node->bcast_own[word_index]); + + batadv_bit_get_packet(bat_priv, word, 1, 0); + w = &orig_node->bcast_own_sum[hard_iface->if_num]; + *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); + spin_unlock_bh(&orig_node->ogm_cnt_lock); + } + rcu_read_unlock(); + } +} + static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); @@ -674,7 +709,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS; } - batadv_slide_own_bcast_window(hard_iface); + batadv_iv_ogm_slide_own_bcast_window(hard_iface); batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff, hard_iface->bat_iv.ogm_buff_len, hard_iface, 1, batadv_iv_ogm_emit_send_time(bat_priv)); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index beeab2e8cd6..fad08469767 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -34,35 +34,6 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); -void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) -{ - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_head *head; - struct batadv_orig_node *orig_node; - unsigned long *word; - uint32_t i; - size_t word_index; - uint8_t *w; - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - spin_lock_bh(&orig_node->ogm_cnt_lock); - word_index = hard_iface->if_num * BATADV_NUM_WORDS; - word = &(orig_node->bcast_own[word_index]); - - batadv_bit_get_packet(bat_priv, word, 1, 0); - w = &orig_node->bcast_own_sum[hard_iface->if_num]; - *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); - spin_unlock_bh(&orig_node->ogm_cnt_lock); - } - rcu_read_unlock(); - } -} - static void _batadv_update_route(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 99eeafaba40..72a29bde201 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -20,7 +20,6 @@ #ifndef _NET_BATMAN_ADV_ROUTING_H_ #define _NET_BATMAN_ADV_ROUTING_H_ -void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface); bool batadv_check_management_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, int header_len); -- cgit v1.2.3 From 3abe4adbfb293e37d2d6f4fe22366534dc2675d9 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 3 Apr 2013 11:15:33 +0200 Subject: batman-adv: refactor batadv_tt_local_event() Instead of passing a generic combination of flags as argument, it is easier to pass the entire tt_common structure (containing the flags already set) plus a bitfield of event flags that will be unified with the already existing ones before inserting the client in the event queue. In this way invocations of the modified function can be simplified. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/translation-table.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 9e874857584..d35b73904e0 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -163,10 +163,19 @@ batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } +/** + * batadv_tt_local_event - store a local TT event (ADD/DEL) + * @bat_priv: the bat priv with all the soft interface information + * @tt_local_entry: the TT entry involved in the event + * @event_flags: flags to store in the event structure + */ static void batadv_tt_local_event(struct batadv_priv *bat_priv, - const uint8_t *addr, uint8_t flags) + struct batadv_tt_local_entry *tt_local_entry, + uint8_t event_flags) { struct batadv_tt_change_node *tt_change_node, *entry, *safe; + struct batadv_tt_common_entry *common = &tt_local_entry->common; + uint8_t flags = common->flags | event_flags; bool event_removed = false; bool del_op_requested, del_op_entry; @@ -176,7 +185,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, return; tt_change_node->change.flags = flags; - memcpy(tt_change_node->change.addr, addr, ETH_ALEN); + memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); del_op_requested = flags & BATADV_TT_CLIENT_DEL; @@ -184,7 +193,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->tt.changes_list_lock); list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { - if (!batadv_compare_eth(entry->change.addr, addr)) + if (!batadv_compare_eth(entry->change.addr, common->addr)) continue; /* DEL+ADD in the same orig interval have no effect and can be @@ -332,7 +341,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, } add_event: - batadv_tt_local_event(bat_priv, addr, tt_local->common.flags); + batadv_tt_local_event(bat_priv, tt_local, BATADV_NO_FLAGS); check_roaming: /* Check whether it is a roaming, but don't do anything if the roaming @@ -529,8 +538,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry, uint16_t flags, const char *message) { - batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, - tt_local_entry->common.flags | flags); + batadv_tt_local_event(bat_priv, tt_local_entry, flags); /* The local client has to be marked as "pending to be removed" but has * to be kept in the table in order to send it in a full table @@ -584,8 +592,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, /* if this client has been added right now, it is possible to * immediately purge it */ - batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, - curr_flags | BATADV_TT_CLIENT_DEL); + batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); hlist_del_rcu(&tt_local_entry->common.hash_entry); batadv_tt_local_entry_free_ref(tt_local_entry); -- cgit v1.2.3 From 41ab6c4891ed4cdd855ae569924acb1da424a614 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 2 Apr 2013 22:28:44 +0200 Subject: batman-adv: don't deal with NET_IP_ALIGN manually Instead of dealing with NET_IP_ALIGN during allocation and headroom reservation, it is possible to use netdev_alloc_skb_ip_align() which transparently allocate and reserve the correct amount of data Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bat_iv_ogm.c | 6 +++--- net/batman-adv/icmp_socket.c | 4 ++-- net/batman-adv/translation-table.c | 20 ++++++++++---------- net/batman-adv/vis.c | 12 ++++++------ 4 files changed, 21 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 42b7a94d61b..5b0a043c47c 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -453,16 +453,16 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, else skb_size = packet_len; - skb_size += ETH_HLEN + NET_IP_ALIGN; + skb_size += ETH_HLEN; - forw_packet_aggr->skb = dev_alloc_skb(skb_size); + forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); if (!forw_packet_aggr->skb) { if (!own_packet) atomic_inc(&bat_priv->batman_queue_left); kfree(forw_packet_aggr); goto out; } - skb_reserve(forw_packet_aggr->skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(forw_packet_aggr->skb, ETH_HLEN); INIT_HLIST_NODE(&forw_packet_aggr->list); diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 0ba6c899b2d..b27508b8085 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -177,13 +177,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, if (len >= sizeof(struct batadv_icmp_packet_rr)) packet_len = sizeof(struct batadv_icmp_packet_rr); - skb = dev_alloc_skb(packet_len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, packet_len + ETH_HLEN); if (!skb) { len = -ENOMEM; goto out; } - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len); if (copy_from_user(icmp_packet, buff, packet_len)) { diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index d35b73904e0..52808c4ae08 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1607,11 +1607,11 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = tt_query_size + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len); tt_response->ttvn = ttvn; @@ -1672,11 +1672,11 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, if (!tt_req_node) goto out; - skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, sizeof(*tt_request) + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); tt_req_len = sizeof(*tt_request); tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len); @@ -1769,11 +1769,11 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -1885,11 +1885,11 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -2219,11 +2219,11 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, if (!batadv_tt_check_roam_count(bat_priv, client)) goto out; - skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len); diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 1625e5793a8..94eaeb521c1 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -392,12 +392,12 @@ batadv_add_packet(struct batadv_priv *bat_priv, return NULL; len = sizeof(*packet) + vis_info_len; - info->skb_packet = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + info->skb_packet = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!info->skb_packet) { kfree(info); return NULL; } - skb_reserve(info->skb_packet, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(info->skb_packet, ETH_HLEN); packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len); kref_init(&info->refcount); @@ -854,13 +854,13 @@ int batadv_vis_init(struct batadv_priv *bat_priv) if (!bat_priv->vis.my_info) goto err; - len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE; - len += ETH_HLEN + NET_IP_ALIGN; - bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len); + len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN; + bat_priv->vis.my_info->skb_packet = netdev_alloc_skb_ip_align(NULL, + len); if (!bat_priv->vis.my_info->skb_packet) goto free_info; - skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); tmp_skb = bat_priv->vis.my_info->skb_packet; packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); -- cgit v1.2.3 From d4ff40f683221d46c351cd9ab61f37a6ea5d2444 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 18 Apr 2013 15:13:01 +0200 Subject: batman-adv: pass a 16bit long flag argument to tt_global_add() it may be the case that we want to store some local TT client flags in a global entry, therefore the tt_global_add needs to get a proper argument for this Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/translation-table.c | 19 +++++++++++++++++-- net/batman-adv/translation-table.h | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 52808c4ae08..e272f68e1b0 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -798,10 +798,25 @@ out: batadv_tt_orig_list_entry_free_ref(orig_entry); } -/* caller must hold orig_node refcount */ +/** + * batadv_tt_global_add - add a new TT global entry or update an existing one + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the originator announcing the client + * @tt_addr: the mac address of the non-mesh client + * @flags: TT flags that have to be set for this non-mesh client + * @ttvn: the tt version number ever announcing this non-mesh client + * + * Add a new TT global entry for the given originator. If the entry already + * exists add a new reference to the given originator (a global entry can have + * references to multiple originators) and adjust the flags attribute to reflect + * the function argument. + * If a TT local entry exists for this non-mesh client remove it. + * + * The caller must hold orig_node refcount. + */ int batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *tt_addr, uint8_t flags, + const unsigned char *tt_addr, uint16_t flags, uint8_t ttvn) { struct batadv_tt_global_entry *tt_global_entry; diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index ab8e683b402..659a3bb759c 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -33,7 +33,7 @@ void batadv_tt_global_add_orig(struct batadv_priv *bat_priv, const unsigned char *tt_buff, int tt_buff_len); int batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *addr, uint8_t flags, + const unsigned char *addr, uint16_t flags, uint8_t ttvn); int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, -- cgit v1.2.3 From e54c77f08ec62434ac8b24e402aa7b787cf42198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 19 Apr 2013 12:06:56 +0200 Subject: batman-adv: Remove unnecessary INIT_HLIST_NODE() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no need to for an explicit hlist_node initialization if it is added to a list right away, like it's the case with the hlist_add_head()s here. Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 2 -- net/batman-adv/send.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 5b0a043c47c..d07323b3e9b 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -464,8 +464,6 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, } skb_reserve(forw_packet_aggr->skb, ETH_HLEN); - INIT_HLIST_NODE(&forw_packet_aggr->list); - skb_buff = skb_put(forw_packet_aggr->skb, packet_len); forw_packet_aggr->packet_len = packet_len; memcpy(skb_buff, packet_buff, packet_len); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index ed7072ad84e..ce69f458a75 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -152,8 +152,6 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time) { - INIT_HLIST_NODE(&forw_packet->list); - /* add new packet to packet list */ spin_lock_bh(&bat_priv->forw_bcast_list_lock); hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list); -- cgit v1.2.3 From aa27c31265f111ff73d948a5846a3f193376491e Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Thu, 18 Apr 2013 04:56:03 +0800 Subject: batman-adv: do not print orig nodes without nc neighbors on nc table print Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/network-coding.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 0787a34609b..22cd51fea73 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1763,6 +1763,13 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) /* For each orig_node in this bin */ rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + /* no need to print the orig node if it does not have + * network coding neighbors + */ + if (list_empty(&orig_node->in_coding_list) && + list_empty(&orig_node->out_coding_list)) + continue; + seq_printf(seq, "Node: %pM\n", orig_node->orig); seq_puts(seq, " Ingoing: "); -- cgit v1.2.3 From eb2deb6b39b1597577c1635e9ebf319f1ae02213 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 19 Apr 2013 18:07:00 +0200 Subject: batman-adv: change VID semantic in the BLA code In order to make batman-adv fully vlan aware later, the semantic used for variables storing the VLAN ID values has to be changed in order to be adapted to the new one which will be used batman-adv wide. In particular, the VID has to be an "_unsigned_ short int" and its 4 MSB will be used as a flag bitfield, while the remaining 12 bits are used to store the real VID value Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner Acked-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 51 ++++++++++++++++++---------------- net/batman-adv/bridge_loop_avoidance.h | 12 ++++---- net/batman-adv/main.h | 11 ++++++++ net/batman-adv/soft-interface.c | 4 +-- net/batman-adv/types.h | 4 +-- 5 files changed, 49 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 082189e2e40..7354063567b 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -180,7 +180,7 @@ static struct batadv_bla_claim */ static struct batadv_bla_backbone_gw * batadv_backbone_hash_find(struct batadv_priv *bat_priv, - uint8_t *addr, short vid) + uint8_t *addr, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; @@ -257,7 +257,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, - short vid, int claimtype) + unsigned short vid, int claimtype) { struct sk_buff *skb; struct ethhdr *ethhdr; @@ -335,13 +335,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); memcpy(ethhdr->h_dest, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): REQUEST of %pM to %pMon vid %d\n", + "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", ethhdr->h_source, ethhdr->h_dest, vid); break; } - if (vid != -1) - skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid); + if (vid & BATADV_VLAN_HAS_TAG) + skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), + vid & VLAN_VID_MASK); skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, soft_iface); @@ -367,7 +368,7 @@ out: */ static struct batadv_bla_backbone_gw * batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, - short vid, bool own_backbone) + unsigned short vid, bool own_backbone) { struct batadv_bla_backbone_gw *entry; struct batadv_orig_node *orig_node; @@ -434,7 +435,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, static void batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -456,7 +457,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, */ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - short vid) + unsigned short vid) { struct hlist_head *head; struct batadv_hashtable *hash; @@ -547,7 +548,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, * @backbone_gw: the backbone gateway which claims it */ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const short vid, + const uint8_t *mac, const unsigned short vid, struct batadv_bla_backbone_gw *backbone_gw) { struct batadv_bla_claim *claim; @@ -611,7 +612,7 @@ claim_free_ref: * given mac address and vid. */ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const short vid) + const uint8_t *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; @@ -637,7 +638,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, /* check for ANNOUNCE frame, return 1 if handled */ static int batadv_handle_announce(struct batadv_priv *bat_priv, uint8_t *an_addr, uint8_t *backbone_addr, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; uint16_t crc; @@ -685,7 +686,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, static int batadv_handle_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, - struct ethhdr *ethhdr, short vid) + struct ethhdr *ethhdr, unsigned short vid) { /* check for REQUEST frame */ if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest)) @@ -709,7 +710,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, static int batadv_handle_unclaim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, - uint8_t *claim_addr, short vid) + uint8_t *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -738,7 +739,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, static int batadv_handle_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, uint8_t *claim_addr, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -861,7 +862,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, struct batadv_bla_claim_dst *bla_dst; uint16_t proto; int headlen; - short vid = -1; + unsigned short vid = BATADV_NO_FLAGS; int ret; ethhdr = eth_hdr(skb); @@ -869,6 +870,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { vhdr = (struct vlan_ethhdr *)ethhdr; vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; proto = ntohs(vhdr->h_vlan_encapsulated_proto); headlen = sizeof(*vhdr); } else { @@ -1358,7 +1360,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; struct batadv_bla_backbone_gw *backbone_gw; - short vid = -1; + unsigned short vid = BATADV_NO_FLAGS; if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance)) return 0; @@ -1375,6 +1377,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size); vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; } /* see if this originator is a backbone gw for this VLAN */ @@ -1424,8 +1427,8 @@ void batadv_bla_free(struct batadv_priv *bat_priv) * returns 1, otherwise it returns 0 and the caller shall further * process the skb. */ -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, - bool is_bcast) +int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; @@ -1519,7 +1522,8 @@ out: * returns 1, otherwise it returns 0 and the caller shall further * process the skb. */ -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) +int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; @@ -1623,7 +1627,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(claim, head, hash_entry) { is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); - seq_printf(seq, " * %pM on % 5d by %pM [%c] (%#.4x)\n", + seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", claim->addr, claim->vid, claim->backbone_gw->orig, (is_own ? 'x' : ' '), @@ -1676,10 +1680,9 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) if (is_own) continue; - seq_printf(seq, - " * %pM on % 5d % 4i.%03is (%#.4x)\n", - backbone_gw->orig, backbone_gw->vid, - secs, msecs, backbone_gw->crc); + seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", + backbone_gw->orig, backbone_gw->vid, secs, + msecs, backbone_gw->crc); } rcu_read_unlock(); } diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index dea2fbc5d98..4b102e71e5b 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -21,9 +21,10 @@ #define _NET_BATMAN_ADV_BLA_H_ #ifdef CONFIG_BATMAN_ADV_BLA -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, - bool is_bcast); -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid); +int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast); +int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid); int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); @@ -42,13 +43,14 @@ void batadv_bla_free(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_BLA */ static inline int batadv_bla_rx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid, bool is_bcast) + struct sk_buff *skb, unsigned short vid, + bool is_bcast) { return 0; } static inline int batadv_bla_tx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid) + struct sk_buff *skb, unsigned short vid) { return 0; } diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index ea1a3bafe9c..6f25ef29f30 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -162,6 +162,17 @@ enum batadv_uev_type { #include #include "types.h" +/** + * batadv_vlan_flags - flags for the four MSB of any vlan ID field + * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not + */ +enum batadv_vlan_flags { + BATADV_VLAN_HAS_TAG = BIT(15), +}; + +#define BATADV_PRINT_VID(vid) (vid & BATADV_VLAN_HAS_TAG ? \ + (int)(vid & VLAN_VID_MASK) : -1) + extern char batadv_routing_algo[]; extern struct list_head batadv_hardif_list; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index b26a6cdb934..700d0b49742 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -154,7 +154,7 @@ static int batadv_interface_tx(struct sk_buff *skb, 0x00, 0x00}; unsigned int header_len = 0; int data_len = skb->len, ret; - short vid __maybe_unused = -1; + unsigned short vid __maybe_unused = BATADV_NO_FLAGS; bool do_bcast = false; uint32_t seqno; unsigned long brd_delay = 1; @@ -303,7 +303,7 @@ void batadv_interface_rx(struct net_device *soft_iface, struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; struct batadv_header *batadv_header = (struct batadv_header *)skb->data; - short vid __maybe_unused = -1; + unsigned short vid __maybe_unused = BATADV_NO_FLAGS; __be16 ethertype = __constant_htons(ETH_P_BATMAN); bool is_bcast; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 5f542bdd9a4..b2c94e13931 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -642,7 +642,7 @@ struct batadv_socket_packet { #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bla_backbone_gw { uint8_t orig[ETH_ALEN]; - short vid; + unsigned short vid; struct hlist_node hash_entry; struct batadv_priv *bat_priv; unsigned long lasttime; @@ -665,7 +665,7 @@ struct batadv_bla_backbone_gw { */ struct batadv_bla_claim { uint8_t addr[ETH_ALEN]; - short vid; + unsigned short vid; struct batadv_bla_backbone_gw *backbone_gw; unsigned long lasttime; struct hlist_node hash_entry; -- cgit v1.2.3 From 5f80df6705fcd8153f93bd0e82109dbeb7ff535b Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 19 Apr 2013 18:07:01 +0200 Subject: batman-adv: print the VID properly Since the MSB bits of any vid variable are now used for storing flags, print the vid properly by taking the flags away and printing -1 in case of VID representing no real VLAN. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/bridge_loop_avoidance.c | 37 +++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 7354063567b..e9d8e0b3c3d 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -307,7 +307,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, */ memcpy(ethhdr->h_source, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): CLAIM %pM on vid %d\n", mac, vid); + "bla_send_claim(): CLAIM %pM on vid %d\n", mac, + BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_UNCLAIM: /* unclaim frame @@ -316,7 +317,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, - vid); + BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_ANNOUNCE: /* announcement frame @@ -325,7 +326,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", - ethhdr->h_source, vid); + ethhdr->h_source, BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_REQUEST: /* request frame @@ -336,7 +337,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(ethhdr->h_dest, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", - ethhdr->h_source, ethhdr->h_dest, vid); + ethhdr->h_source, ethhdr->h_dest, + BATADV_PRINT_VID(vid)); break; } @@ -381,7 +383,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n", - orig, vid); + orig, BATADV_PRINT_VID(vid)); entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) @@ -573,7 +575,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, atomic_set(&claim->refcount, 2); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim, @@ -592,7 +594,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): changing ownership for %pM, vid %d\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); batadv_backbone_gw_free_ref(claim->backbone_gw); @@ -623,7 +625,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, return; batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim); @@ -659,12 +661,13 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", - vid, backbone_gw->orig, crc); + BATADV_PRINT_VID(vid), backbone_gw->orig, crc); if (backbone_gw->crc != crc) { batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", - backbone_gw->orig, backbone_gw->vid, + backbone_gw->orig, + BATADV_PRINT_VID(backbone_gw->vid), backbone_gw->crc, crc); batadv_bla_send_request(backbone_gw); @@ -700,7 +703,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_request(): REQUEST vid %d (sent by %pM)...\n", - vid, ethhdr->h_source); + BATADV_PRINT_VID(vid), ethhdr->h_source); batadv_bla_answer_request(bat_priv, primary_if, vid); return 1; @@ -728,7 +731,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, /* this must be an UNCLAIM frame */ batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n", - claim_addr, vid, backbone_gw->orig); + claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig); batadv_bla_del_claim(bat_priv, claim_addr, vid); batadv_backbone_gw_free_ref(backbone_gw); @@ -912,7 +915,8 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, if (ret == 1) batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, vid, hw_src, hw_dst); + ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, + hw_dst); if (ret < 2) return ret; @@ -947,7 +951,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, vid, hw_src, hw_dst); + ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst); return 1; } @@ -1628,7 +1632,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", - claim->addr, claim->vid, + claim->addr, BATADV_PRINT_VID(claim->vid), claim->backbone_gw->orig, (is_own ? 'x' : ' '), claim->backbone_gw->crc); @@ -1681,7 +1685,8 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) continue; seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", - backbone_gw->orig, backbone_gw->vid, secs, + backbone_gw->orig, + BATADV_PRINT_VID(backbone_gw->vid), secs, msecs, backbone_gw->crc); } rcu_read_unlock(); -- cgit v1.2.3 From e91ecfc64ad691176be119e627e36cec8564f44b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= Date: Sat, 20 Apr 2013 13:54:39 +0200 Subject: batman-adv: Move call to batadv_nc_skb_forward() from routing.c to send.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to batadv_nc_skb_forward() fits better in batadv_send_skb_to_orig(), as this is where the actual next hop is looked up. To let the caller of batadv_send_skb_to_orig() know wether the skb is transmitted, buffered or failed, the return value is changed from boolean to int. Signed-off-by: Martin Hundebøll Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/network-coding.c | 5 ++--- net/batman-adv/network-coding.h | 6 ++---- net/batman-adv/routing.c | 23 +++++++++++++---------- net/batman-adv/send.c | 27 +++++++++++++++++++-------- net/batman-adv/send.h | 6 +++--- net/batman-adv/translation-table.c | 12 +++++++----- net/batman-adv/unicast.c | 2 +- net/batman-adv/vis.c | 7 ++++--- 8 files changed, 51 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 22cd51fea73..a487d46e0ae 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1359,18 +1359,17 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, * buffer * @skb: data skb to forward * @neigh_node: next hop to forward packet to - * @ethhdr: pointer to the ethernet header inside the skb * * Returns true if the skb was consumed (encoded packet sent) or false otherwise */ bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr) + struct batadv_neigh_node *neigh_node) { const struct net_device *netdev = neigh_node->if_incoming->soft_iface; struct batadv_priv *bat_priv = netdev_priv(netdev); struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; + struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 4fa6d0caddb..85a4ec81ad5 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -36,8 +36,7 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv, void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv); void batadv_nc_init_orig(struct batadv_orig_node *orig_node); bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr); + struct batadv_neigh_node *neigh_node); void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, struct sk_buff *skb); void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, @@ -87,8 +86,7 @@ static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node) } static inline bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr) + struct batadv_neigh_node *neigh_node) { return false; } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index fad08469767..2f0bd3ffe6e 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -285,7 +285,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_ECHO_REPLY; icmp_packet->header.ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -333,7 +333,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_TTL_EXCEEDED; icmp_packet->header.ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -410,7 +410,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmp_packet->header.ttl--; /* route it */ - if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) + if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -775,7 +775,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_neigh_node *neigh_node = NULL; struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr = eth_hdr(skb); - int ret = NET_RX_DROP; + int res, ret = NET_RX_DROP; struct sk_buff *new_skb; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -835,16 +835,19 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* decrement ttl */ unicast_packet->header.ttl--; - /* network code packet if possible */ - if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) { - ret = NET_RX_SUCCESS; - } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) { - ret = NET_RX_SUCCESS; + res = batadv_send_skb_to_orig(skb, orig_node, recv_if); - /* Update stats counter */ + /* translate transmit result into receive result */ + if (res == NET_XMIT_SUCCESS) { + /* skb was transmitted and consumed */ batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, skb->len + ETH_HLEN); + + ret = NET_RX_SUCCESS; + } else if (res == NET_XMIT_POLICED) { + /* skb was buffered and consumed */ + ret = NET_RX_SUCCESS; } out: diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index ce69f458a75..e9ff8d80120 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -96,26 +96,37 @@ send_skb_err: * host, NULL can be passed as recv_if and no interface alternating is * attempted. * - * Returns TRUE on success; FALSE otherwise. + * Returns NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or + * NET_XMIT_POLICED if the skb is buffered for later transmit. */ -bool batadv_send_skb_to_orig(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_hard_iface *recv_if) +int batadv_send_skb_to_orig(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if) { struct batadv_priv *bat_priv = orig_node->bat_priv; struct batadv_neigh_node *neigh_node; + int ret = NET_XMIT_DROP; /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); if (!neigh_node) - return false; + return ret; - /* route it */ - batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + /* try to network code the packet, if it is received on an interface + * (i.e. being forwarded). If the packet originates from this node or if + * network coding fails, then send the packet as usual. + */ + if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) { + ret = NET_XMIT_POLICED; + } else { + batadv_send_skb_packet(skb, neigh_node->if_incoming, + neigh_node->addr); + ret = NET_XMIT_SUCCESS; + } batadv_neigh_node_free_ref(neigh_node); - return true; + return ret; } void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface) diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 38e662f619a..e7b17880fca 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -23,9 +23,9 @@ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, const uint8_t *dst_addr); -bool batadv_send_skb_to_orig(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_hard_iface *recv_if); +int batadv_send_skb_to_orig(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if); void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface); int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index e272f68e1b0..429aeef3d8b 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1713,7 +1713,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX); - if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: @@ -1737,7 +1737,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, struct batadv_orig_node *req_dst_orig_node; struct batadv_orig_node *res_dst_orig_node = NULL; uint8_t orig_ttvn, req_ttvn, ttvn; - int ret = false; + int res, ret = false; unsigned char *tt_buff; bool full_table; uint16_t tt_len, tt_tot; @@ -1832,8 +1832,10 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - if (batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL)) + res = batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL); + if (res != NET_XMIT_DROP) ret = true; + goto out; unlock: @@ -1947,7 +1949,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = true; goto out; @@ -2260,7 +2262,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 0bb3b5982f9..dc8b5d4dd63 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -464,7 +464,7 @@ find_router: goto out; } - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 94eaeb521c1..4983340f194 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -697,7 +697,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node; struct batadv_vis_packet *packet; struct sk_buff *skb; - uint32_t i; + uint32_t i, res; packet = (struct batadv_vis_packet *)info->skb_packet->data; @@ -724,7 +724,8 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, if (!skb) continue; - if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res == NET_XMIT_DROP) kfree_skb(skb); } rcu_read_unlock(); @@ -748,7 +749,7 @@ static void batadv_unicast_vis_packet(struct batadv_priv *bat_priv, if (!skb) goto out; - if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) kfree_skb(skb); out: -- cgit v1.2.3 From 6715fd3f0538e805b6a769d66823ec16b8b647ac Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sat, 20 Apr 2013 17:15:09 +0200 Subject: batman-adv: Start new development cycle Signed-off-by: Simon Wunderlich Signed-off-by: Antonio Quartulli --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 6f25ef29f30..5e9aebb7d56 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -26,7 +26,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2013.2.0" +#define BATADV_SOURCE_VERSION "2013.3.0" #endif /* B.A.T.M.A.N. parameters */ -- cgit v1.2.3 From 75538c2b85cf22eb9af6adfaf26ed7219025adeb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 29 May 2013 11:30:50 +0800 Subject: net: always pass struct netdev_notifier_info to netdevice notifiers commit 351638e7deeed2ec8ce451b53d3 (net: pass info struct via netdevice notifier) breaks booting of my KVM guest, this is due to we still forget to pass struct netdev_notifier_info in several places. This patch completes it. Cc: Jiri Pirko Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/atm/clip.c | 4 +++- net/core/dev.c | 6 ------ net/ipv4/netfilter/ipt_MASQUERADE.c | 5 ++++- net/ipv6/addrconf.c | 7 +++++-- net/ipv6/netfilter/ip6t_MASQUERADE.c | 4 +++- 5 files changed, 15 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/atm/clip.c b/net/atm/clip.c index cce241eb01d..8215f7cb170 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -575,6 +575,7 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, void *ifa) { struct in_device *in_dev; + struct netdev_notifier_info info; in_dev = ((struct in_ifaddr *)ifa)->ifa_dev; /* @@ -583,7 +584,8 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, */ if (event != NETDEV_UP) return NOTIFY_DONE; - return clip_device_event(this, NETDEV_CHANGE, in_dev->dev); + netdev_notifier_info_init(&info, in_dev->dev); + return clip_device_event(this, NETDEV_CHANGE, &info); } static struct notifier_block clip_dev_notifier = { diff --git a/net/core/dev.c b/net/core/dev.c index 6eb621cc3b8..b2e9057be3b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1391,12 +1391,6 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); -static void netdev_notifier_info_init(struct netdev_notifier_info *info, - struct net_device *dev) -{ - info->dev = dev; -} - static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, struct net_device *dev) { diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index dd5508bde79..30e4de94056 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -129,7 +129,10 @@ static int masq_inet_event(struct notifier_block *this, void *ptr) { struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; - return masq_device_event(this, event, dev); + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, dev); + return masq_device_event(this, event, &info); } static struct notifier_block masq_dev_notifier = { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bce073b4bbd..7b34f06af34 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4645,13 +4645,16 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, static void dev_disable_change(struct inet6_dev *idev) { + struct netdev_notifier_info info; + if (!idev || !idev->dev) return; + netdev_notifier_info_init(&info, idev->dev); if (idev->cnf.disable_ipv6) - addrconf_notify(NULL, NETDEV_DOWN, idev->dev); + addrconf_notify(NULL, NETDEV_DOWN, &info); else - addrconf_notify(NULL, NETDEV_UP, idev->dev); + addrconf_notify(NULL, NETDEV_UP, &info); } static void addrconf_disable_change(struct net *net, __s32 newf) diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index b76257cd7e1..47bff610751 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -89,8 +89,10 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; + struct netdev_notifier_info info; - return masq_device_event(this, event, ifa->idev->dev); + netdev_notifier_info_init(&info, ifa->idev->dev); + return masq_device_event(this, event, &info); } static struct notifier_block masq_inet_notifier = { -- cgit v1.2.3 From ced14f6804a979d1972415bc23f2f8ddb18595dd Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 28 May 2013 20:34:25 +0000 Subject: net: Correct comparisons and calculations using skb->tail and skb-transport_header This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case skb->tail will be a pointer whereas skb->transport_header will be an offset from head. This is corrected by using wrappers that ensure that comparisons and calculations are always made using pointers. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b2e9057be3b..d4d874a25e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1724,7 +1724,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb2); if (skb_network_header(skb2) < skb2->data || - skb2->network_header > skb2->tail) { + skb_network_header(skb2) > skb_tail_pointer(skb2)) { net_crit_ratelimited("protocol %04x is buggy, dev %s\n", ntohs(skb2->protocol), dev->name); @@ -3892,7 +3892,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; - if (skb->mac_header == skb->tail && + if (skb_mac_header(skb) == skb_tail_pointer(skb) && pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); -- cgit v1.2.3 From 29a3cad5c6ae9e7fbf1509d01d39c3c3c38f11f9 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 28 May 2013 20:34:26 +0000 Subject: ipv6: Correct comparisons and calculations using skb->tail and skb-transport_header This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case skb->tail will be a pointer whereas skb->transport_header will be an offset from head. This is corrected by using wrappers that ensure that comparisons and calculations are always made using pointers. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv6/exthdrs_core.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/mcast.c | 5 +++-- net/ipv6/mip6.c | 6 ++++-- net/ipv6/ndisc.c | 9 +++++---- net/ipv6/output_core.c | 3 ++- net/ipv6/raw.c | 3 ++- net/ipv6/route.c | 2 +- 8 files changed, 19 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index c5e83fae4df..140748debc4 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL(ipv6_skip_exthdr); int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) { const unsigned char *nh = skb_network_header(skb); - int packet_len = skb->tail - skb->network_header; + int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); struct ipv6_opt_hdr *hdr; int len; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1d2902e6178..4b4890bbe16 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -399,7 +399,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int err = 0; if ((u8 *)hdr < skb->head || - (skb->network_header + sizeof(*hdr)) > skb->tail) + (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) return; /* diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bfa6cc36ef2..72c8bfe06bb 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1409,8 +1409,9 @@ static void mld_sendpack(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); - mldlen = skb->tail - skb->transport_header; + payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) - + sizeof(*pip6); + mldlen = skb_tail_pointer(skb) - skb_transport_header(skb); pip6->payload_len = htons(payload_len); pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 0f9bdc5ee9f..9ac01dc9402 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -268,7 +268,8 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; @@ -404,7 +405,8 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a0962697a25..781dd3c9968 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -693,7 +693,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; @@ -853,7 +853,7 @@ static void ndisc_recv_na(struct sk_buff *skb) const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; @@ -1069,7 +1069,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) __u8 * opt = (__u8 *)(ra_msg + 1); - optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg); + optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - + sizeof(struct ra_msg); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "RA: source address is not link-local\n"); @@ -1346,7 +1347,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) u8 *hdr; struct ndisc_options ndopts; struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct rd_msg, opt)); #ifdef CONFIG_IPV6_NDISC_NODETYPE diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index c2e73e647e4..ab92a3673fb 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -40,7 +40,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) u16 offset = sizeof(struct ipv6hdr); struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index eedff8ccded..4f8886aa842 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1132,7 +1132,8 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) - amount = skb->tail - skb->transport_header; + amount = skb_tail_pointer(skb) - + skb_transport_header(skb); spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 194c3cde153..2b874185ebb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1649,7 +1649,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu int optlen, on_link; u8 *lladdr; - optlen = skb->tail - skb->transport_header; + optlen = skb_tail_pointer(skb) - skb_transport_header(skb); optlen -= sizeof(*msg); if (optlen < 0) { -- cgit v1.2.3 From f7c0c2ae843b74f8dba55820cb0a3de19c976703 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 28 May 2013 20:34:27 +0000 Subject: ipv4: Correct comparisons and calculations using skb->tail and skb-transport_header This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case skb->tail will be a pointer whereas skb->transport_header will be an offset from head. This is corrected by using wrappers that ensure that comparisons and calculations are always made using pointers. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 3 ++- net/ipv4/igmp.c | 2 +- net/ipv4/tcp.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 562efd91f45..5d0d379b015 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -503,7 +503,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph = ip_hdr(skb_in); if ((u8 *)iph < skb_in->head || - (skb_in->network_header + sizeof(*iph)) > skb_in->tail) + (skb_network_header(skb_in) + sizeof(*iph)) > + skb_tail_pointer(skb_in)) goto out; /* diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d8c232794bc..450f625361e 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -363,7 +363,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) static int igmpv3_sendpack(struct sk_buff *skb) { struct igmphdr *pig = igmp_hdr(skb); - const int igmplen = skb->tail - skb->transport_header; + const int igmplen = skb_tail_pointer(skb) - skb_transport_header(skb); pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ba4186e1dca..1f58594d5a8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2989,7 +2989,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, swap(gso_skb->truesize, skb->truesize); } - delta = htonl(oldlen + (skb->tail - skb->transport_header) + + delta = htonl(oldlen + (skb_tail_pointer(skb) - + skb_transport_header(skb)) + skb->data_len); th->check = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); -- cgit v1.2.3 From 158874cac61245b84e939c92c53db7000122b7b0 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 28 May 2013 20:34:28 +0000 Subject: sctp: Correct access to skb->{network, transport}_header This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case sk_buff_data_t will be a pointer, however, skb->{network,transport}_header is now __u16. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/sctp/input.c | 2 +- net/sctp/ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index 4b2c83146aa..e328fe8f93f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct sctp_association *asoc = NULL; struct sctp_transport *transport; struct inet_sock *inet; - sk_buff_data_t saveip, savesctp; + __be16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 391a245d520..8ee553b499c 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -153,7 +153,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sctp_association *asoc; struct sctp_transport *transport; struct ipv6_pinfo *np; - sk_buff_data_t saveip, savesctp; + __be16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); -- cgit v1.2.3 From 7cc461900549fc480eb133948649a1edb7eaaa6f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 28 May 2013 20:34:29 +0000 Subject: net, ipv4, ipv6: Correct assignment of skb->network_header to skb->tail This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case skb->tail will be a pointer however skb->network_header is now an offset. This patch corrects the problem by adding a wrapper to return skb tail as an offset regardless of the value of NET_SKBUFF_DATA_USES_OFFSET. It seems that skb->tail that this offset may be more than 64k and some care has been taken to treat such cases as an error. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/core/netpoll.c | 9 ++++++++- net/core/pktgen.c | 16 ++++++++++++++-- net/ipv4/ipmr.c | 8 +++++++- 3 files changed, 29 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 37deedd48bc..688517c7ff1 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -676,6 +676,8 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo spin_lock_irqsave(&npinfo->rx_lock, flags); list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { + unsigned long tail_offset; + if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) continue; @@ -700,7 +702,12 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo hdr->saddr = *saddr; hdr->daddr = *daddr; - send_skb->transport_header = send_skb->tail; + tail_offset = skb_tail_offset(skb); + if (tail_offset > 0xffff) { + kfree_skb(send_skb); + continue; + } + skb_set_network_header(send_skb, tail_offset); skb_put(send_skb, size); icmp6h = (struct icmp6hdr *)skb_transport_header(skb); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 795498fd458..d2ede89662b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2642,6 +2642,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ u16 queue_map; + unsigned long tail_offset; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2708,7 +2709,12 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IP); } - skb->network_header = skb->tail; + tail_offset = skb_tail_offset(skb); + if (tail_offset > 0xffff) { + kfree_skb(skb); + return NULL; + } + skb_set_network_header(skb, tail_offset); skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); @@ -2775,6 +2781,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ u16 queue_map; + unsigned long tail_offset; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2822,7 +2829,12 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IPV6); } - skb->network_header = skb->tail; + tail_offset = skb_tail_offset(skb); + if (tail_offset > 0xffff) { + kfree_skb(skb); + return NULL; + } + skb_set_network_header(skb, tail_offset); skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index f975399f352..df97f0ac1a1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -945,6 +945,7 @@ static int ipmr_cache_report(struct mr_table *mrt, struct igmpmsg *msg; struct sock *mroute_sk; int ret; + unsigned long tail_offset; #ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) @@ -980,7 +981,12 @@ static int ipmr_cache_report(struct mr_table *mrt, /* Copy the IP header */ - skb->network_header = skb->tail; + tail_offset = skb_tail_offset(skb); + if (tail_offset > 0xffff) { + kfree_skb(skb); + return -EINVAL; + } + skb_set_network_header(skb, tail_offset); skb_put(skb, ihl); skb_copy_to_linear_data(skb, pkt->data, ihl); ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ -- cgit v1.2.3 From e057d3c31bdf87616b415c4b2cbf7310f54b9219 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 28 May 2013 13:01:52 +0200 Subject: cfg80211: support an active monitor interface flag An active monitor interface is one that is used for communication (via injection). It is expected to ACK incoming unicast packets. This is useful for running various 802.11 testing utilities that associate to an AP via injection and manage the state in user space. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fb6abcb359a..31d265f36d2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2227,6 +2227,7 @@ static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = { [NL80211_MNTR_FLAG_CONTROL] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG }, + [NL80211_MNTR_FLAG_ACTIVE] = { .type = NLA_FLAG }, }; static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) @@ -2338,6 +2339,10 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) change = true; } + if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) && + !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) + return -EOPNOTSUPP; + if (change) err = cfg80211_change_iface(rdev, dev, ntype, flags, ¶ms); else @@ -2395,6 +2400,11 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); + + if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) && + !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) + return -EOPNOTSUPP; + wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); -- cgit v1.2.3 From 31eba5bc56a9324f056d28569a4f89f39c1c3f70 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 28 May 2013 13:01:53 +0200 Subject: mac80211: support active monitor interfaces Support them only if the driver advertises support for them via IEEE80211_HW_SUPPORTS_ACTIVE_MONITOR. Unlike normal monitor interfaces, they are added to the driver, along with their MAC address. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 11 +++++++---- net/mac80211/driver-ops.h | 3 ++- net/mac80211/iface.c | 29 +++++++++++++++++++++++------ net/mac80211/util.c | 6 ++++++ 4 files changed, 38 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 232edf78d5a..9034da16cf1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -73,16 +73,19 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct ieee80211_local *local = sdata->local; if (ieee80211_sdata_running(sdata)) { + u32 mask = MONITOR_FLAG_COOK_FRAMES | + MONITOR_FLAG_ACTIVE; + /* - * Prohibit MONITOR_FLAG_COOK_FRAMES to be - * changed while the interface is up. + * Prohibit MONITOR_FLAG_COOK_FRAMES and + * MONITOR_FLAG_ACTIVE to be changed while the + * interface is up. * Else we would need to add a lot of cruft * to update everything: * cooked_mntrs, monitor and all fif_* counters * reconfigure hardware */ - if ((*flags & MONITOR_FLAG_COOK_FRAMES) != - (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)) + if ((*flags & mask) != (sdata->u.mntr_flags & mask)) return -EBUSY; ieee80211_adjust_monitor_flags(sdata, -1); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 169664c122e..b931c96a596 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -146,7 +146,8 @@ static inline int drv_add_interface(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)))) + !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF) && + !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)))) return -EINVAL; trace_drv_add_interface(local, sdata); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ceef64426a8..7cabaf261fe 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -159,7 +159,8 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr) +static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr, + bool check_dup) { struct ieee80211_sub_if_data *sdata; u64 new, mask, tmp; @@ -179,10 +180,13 @@ static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr) ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) | ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8); + if (!check_dup) + return ret; mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) continue; m = sdata->vif.addr; @@ -204,12 +208,17 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sockaddr *sa = addr; + bool check_dup = true; int ret; if (ieee80211_sdata_running(sdata)) return -EBUSY; - ret = ieee80211_verify_mac(sdata->local, sa->sa_data); + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + check_dup = false; + + ret = ieee80211_verify_mac(sdata->local, sa->sa_data, check_dup); if (ret) return ret; @@ -541,7 +550,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; } - if (local->monitors == 0 && local->open_count == 0) { + if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) { + res = drv_add_interface(local, sdata); + if (res) + goto err_stop; + } else if (local->monitors == 0 && local->open_count == 0) { res = ieee80211_add_virtual_monitor(local); if (res) goto err_stop; @@ -919,7 +932,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->mtx); ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); - break; + + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + break; + + /* fall through */ default: if (going_down) drv_remove_interface(local, sdata); @@ -1068,7 +1085,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_start_xmit = ieee80211_monitor_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_monitor_select_queue, }; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 2a8d759324c..89a83770d15 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -560,6 +560,9 @@ void ieee80211_iterate_active_interfaces( list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + continue; + break; case NL80211_IFTYPE_AP_VLAN: continue; default: @@ -598,6 +601,9 @@ void ieee80211_iterate_active_interfaces_atomic( list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + continue; + break; case NL80211_IFTYPE_AP_VLAN: continue; default: -- cgit v1.2.3 From f4d57941bf89997bad3294f94987caebf2771a33 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 28 May 2013 17:24:15 +0200 Subject: mac80211: always send multicast on CAB queue If the driver advertised support for a CAB queue, then we should put all multicast frames there, otherwise sending them can be racy with clients going to sleep while we TX a frame. To avoid this, always TX multicast frames on the multicast queue. It seems like even drivers not using the queue framework might want to do this which would mean also moving the IEEE80211_TX_CTL_SEND_AFTER_DTIM flag assignment, but it also seems that drivers behave differently here so that just moving it wouldn't be a good idea. It'd be better to modify those drivers to use the queue framework. Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 9972e07a2f9..34be9336b5d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -398,13 +398,14 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (ieee80211_has_order(hdr->frame_control)) return TX_CONTINUE; + if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + info->hw_queue = tx->sdata->vif.cab_queue; + /* no stations in PS mode */ if (!atomic_read(&ps->num_sta_ps)) return TX_CONTINUE; info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; - if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) - info->hw_queue = tx->sdata->vif.cab_queue; /* device releases frame after DTIM beacon */ if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)) -- cgit v1.2.3 From 6804973ffb4288bba14d53223e2fbb2bbd1d2e1b Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 29 May 2013 14:20:11 +0000 Subject: tcp: consolidate PRR packet accounting This patch series fixes an undo bug in fast recovery: the sender mistakenly undos the cwnd too early but continues fast retransmits until all pending data are acked. This also multiplies the SNMP stat PARTIALUNDO events by the degree of the network reordering. The first patch prepares the fix by consolidating the accounting of newly_acked_sacked in tcp_cwnd_reduction(), instead of updating newly_acked_sacked everytime sacked_out is adjusted. Also pass acked and prior_unsacked as const type because they are readonly in the rest of recovery processing. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9579e1a5a14..86b5fa72ff9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2430,12 +2430,14 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) TCP_ECN_queue_cwr(tp); } -static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, +static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, int fast_rexmit) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + int newly_acked_sacked = prior_unsacked - + (tp->packets_out - tp->sacked_out); tp->prr_delivered += newly_acked_sacked; if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { @@ -2492,7 +2494,7 @@ static void tcp_try_keep_open(struct sock *sk) } } -static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) +static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) { struct tcp_sock *tp = tcp_sk(sk); @@ -2509,7 +2511,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) tcp_moderate_cwnd(tp); } else { - tcp_cwnd_reduction(sk, newly_acked_sacked, 0); + tcp_cwnd_reduction(sk, prior_unsacked, 0); } } @@ -2678,15 +2680,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int prior_sacked, int prior_packets, +static void tcp_fastretrans_alert(struct sock *sk, const int acked, + const int prior_unsacked, bool is_dupack, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - int newly_acked_sacked = 0; int fast_rexmit = 0; if (WARN_ON(!tp->packets_out && tp->sacked_out)) @@ -2739,9 +2740,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); } else - do_lost = tcp_try_undo_partial(sk, pkts_acked); - newly_acked_sacked = prior_packets - tp->packets_out + - tp->sacked_out - prior_sacked; + do_lost = tcp_try_undo_partial(sk, acked); break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack); @@ -2755,14 +2754,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (is_dupack) tcp_add_reno_sack(sk); } - newly_acked_sacked = prior_packets - tp->packets_out + - tp->sacked_out - prior_sacked; if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk, flag)) { - tcp_try_to_open(sk, flag, newly_acked_sacked); + tcp_try_to_open(sk, flag, prior_unsacked); return; } @@ -2784,7 +2781,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (do_lost) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); + tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit); tcp_xmit_retransmit_queue(sk); } @@ -3268,9 +3265,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 prior_in_flight; u32 prior_fackets; int prior_packets = tp->packets_out; - int prior_sacked = tp->sacked_out; - int pkts_acked = 0; - int previous_packets_out = 0; + const int prior_unsacked = tp->packets_out - tp->sacked_out; + int acked = 0; /* Number of packets newly acked */ /* If the ack is older than previous acks * then we can probably ignore it. @@ -3345,18 +3341,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - previous_packets_out = tp->packets_out; + acked = tp->packets_out; flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); - - pkts_acked = previous_packets_out - tp->packets_out; + acked -= tp->packets_out; if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); } else { if (flag & FLAG_DATA_ACKED) tcp_cong_avoid(sk, ack, prior_in_flight); @@ -3378,8 +3373,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3401,8 +3396,8 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); } SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); -- cgit v1.2.3 From 6a63df46a7363833a0dc0c431027f522b3487972 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 29 May 2013 14:20:12 +0000 Subject: tcp: refactor undo functions Refactor and relocate various functions or variables to prepare the undo fix. Remove some unused function arguments. Rename tcp_undo_cwr to tcp_undo_cwnd_reduction to be consistent with the rest of CWR related function names. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 97 +++++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 86b5fa72ff9..fcb668d1860 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2243,10 +2243,23 @@ static void DBGUNDO(struct sock *sk, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) +static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh, + bool unmark_loss) { struct tcp_sock *tp = tcp_sk(sk); + if (unmark_loss) { + struct sk_buff *skb; + + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; + } + tp->lost_out = 0; + tcp_clear_all_retrans_hints(tp); + } + if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2263,6 +2276,9 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } tp->snd_cwnd_stamp = tcp_time_stamp; + + if (undo_ssthresh) + tp->undo_marker = 0; } static inline bool tcp_may_undo(const struct tcp_sock *tp) @@ -2282,14 +2298,13 @@ static bool tcp_try_undo_recovery(struct sock *sk) * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwr(sk, true); + tcp_undo_cwnd_reduction(sk, true, false); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) mib_idx = LINUX_MIB_TCPLOSSUNDO; else mib_idx = LINUX_MIB_TCPFULLUNDO; NET_INC_STATS_BH(sock_net(sk), mib_idx); - tp->undo_marker = 0; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq @@ -2309,8 +2324,7 @@ static void tcp_try_undo_dsack(struct sock *sk) if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); - tcp_undo_cwr(sk, true); - tp->undo_marker = 0; + tcp_undo_cwnd_reduction(sk, true, false); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); } } @@ -2344,60 +2358,20 @@ static bool tcp_any_retrans_done(const struct sock *sk) return false; } -/* Undo during fast recovery after partial ACK. */ - -static int tcp_try_undo_partial(struct sock *sk, int acked) -{ - struct tcp_sock *tp = tcp_sk(sk); - /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering); - - if (tcp_may_undo(tp)) { - /* Plain luck! Hole if filled with delayed - * packet, rather than with a retransmit. - */ - if (!tcp_any_retrans_done(sk)) - tp->retrans_stamp = 0; - - tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); - - DBGUNDO(sk, "Hoe"); - tcp_undo_cwr(sk, false); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); - - /* So... Do not make Hoe's retransmit yet. - * If the first packet was delayed, the rest - * ones are most probably delayed as well. - */ - failed = 0; - } - return failed; -} - /* Undo during loss recovery after partial ACK or using F-RTO. */ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { struct tcp_sock *tp = tcp_sk(sk); if (frto_undo || tcp_may_undo(tp)) { - struct sk_buff *skb; - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - } - - tcp_clear_all_retrans_hints(tp); + tcp_undo_cwnd_reduction(sk, true, true); DBGUNDO(sk, "partial loss"); - tp->lost_out = 0; - tcp_undo_cwr(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); if (frto_undo) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - tp->undo_marker = 0; if (frto_undo || tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return true; @@ -2669,6 +2643,35 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) tcp_xmit_retransmit_queue(sk); } +/* Undo during fast recovery after partial ACK. */ +static bool tcp_try_undo_partial(struct sock *sk, int acked) +{ + struct tcp_sock *tp = tcp_sk(sk); + /* Partial ACK arrived. Force Hoe's retransmit. */ + bool failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering); + + if (tcp_may_undo(tp)) { + /* Plain luck! Hole if filled with delayed + * packet, rather than with a retransmit. + */ + if (!tcp_any_retrans_done(sk)) + tp->retrans_stamp = 0; + + tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); + + DBGUNDO(sk, "Hoe"); + tcp_undo_cwnd_reduction(sk, false, false); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); + + /* So... Do not make Hoe's retransmit yet. + * If the first packet was delayed, the rest + * ones are most probably delayed as well. + */ + failed = false; + } + return failed; +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2686,7 +2689,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && + bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); int fast_rexmit = 0; -- cgit v1.2.3 From 7026b912f97d912476dff5465ed9a127be094208 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 29 May 2013 14:20:13 +0000 Subject: tcp: fix undo on partial ack in recovery Upon detecting spurious fast retransmit via timestamps during recovery, use PRR to clock out new data packet instead of retransmission. Once all retransmission are proven spurious, the sender then reverts the cwnd reduction and congestion state to open or disorder. The current code does the opposite: it undoes cwnd as soon as any retransmission is spurious and continues to retransmit until all data are acked. This nullifies the point to undo the cwnd because the sender is still retransmistting spuriously. This patch fixes it. The undo_ssthresh argument of tcp_undo_cwnd_reductiuon() is no longer needed and is removed. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 59 +++++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fcb668d1860..c35b2275198 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2243,8 +2243,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh, - bool unmark_loss) +static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) { struct tcp_sock *tp = tcp_sk(sk); @@ -2268,7 +2267,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh, else tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); - if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) { + if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; TCP_ECN_withdraw_cwr(tp); } @@ -2276,9 +2275,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, const bool undo_ssthresh, tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } tp->snd_cwnd_stamp = tcp_time_stamp; - - if (undo_ssthresh) - tp->undo_marker = 0; + tp->undo_marker = 0; } static inline bool tcp_may_undo(const struct tcp_sock *tp) @@ -2298,7 +2295,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwnd_reduction(sk, true, false); + tcp_undo_cwnd_reduction(sk, false); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) mib_idx = LINUX_MIB_TCPLOSSUNDO; else @@ -2324,7 +2321,7 @@ static void tcp_try_undo_dsack(struct sock *sk) if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); - tcp_undo_cwnd_reduction(sk, true, false); + tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); } } @@ -2364,7 +2361,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) struct tcp_sock *tp = tcp_sk(sk); if (frto_undo || tcp_may_undo(tp)) { - tcp_undo_cwnd_reduction(sk, true, true); + tcp_undo_cwnd_reduction(sk, true); DBGUNDO(sk, "partial loss"); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); @@ -2644,32 +2641,37 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) } /* Undo during fast recovery after partial ACK. */ -static bool tcp_try_undo_partial(struct sock *sk, int acked) +static bool tcp_try_undo_partial(struct sock *sk, const int acked, + const int prior_unsacked) { struct tcp_sock *tp = tcp_sk(sk); - /* Partial ACK arrived. Force Hoe's retransmit. */ - bool failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering); - if (tcp_may_undo(tp)) { + if (tp->undo_marker && tcp_packet_delayed(tp)) { /* Plain luck! Hole if filled with delayed * packet, rather than with a retransmit. */ + tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); + + /* We are getting evidence that the reordering degree is higher + * than we realized. If there are no retransmits out then we + * can undo. Otherwise we clock out new packets but do not + * mark more packets lost or retransmit more. + */ + if (tp->retrans_out) { + tcp_cwnd_reduction(sk, prior_unsacked, 0); + return true; + } + if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; - tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); - - DBGUNDO(sk, "Hoe"); - tcp_undo_cwnd_reduction(sk, false, false); + DBGUNDO(sk, "partial recovery"); + tcp_undo_cwnd_reduction(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); - - /* So... Do not make Hoe's retransmit yet. - * If the first packet was delayed, the rest - * ones are most probably delayed as well. - */ - failed = false; + tcp_try_keep_open(sk); + return true; } - return failed; + return false; } /* Process an event, which can update packets-in-flight not trivially. @@ -2742,8 +2744,13 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (!(flag & FLAG_SND_UNA_ADVANCED)) { if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); - } else - do_lost = tcp_try_undo_partial(sk, acked); + } else { + if (tcp_try_undo_partial(sk, acked, prior_unsacked)) + return; + /* Partial ACK arrived. Force fast retransmit. */ + do_lost = tcp_is_reno(tp) || + tcp_fackets_out(tp) > tp->reordering; + } break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack); -- cgit v1.2.3 From c7d9d6a185a7ea383b719b79c428d34ec1470275 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 29 May 2013 14:20:14 +0000 Subject: tcp: undo on DSACK during recovery If the receiver supports DSACK, sender can detect false recoveries and revert cwnd reductions triggered by either severe network reordering or concurrent reordering and loss event. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c35b2275198..907311c9a01 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2315,7 +2315,7 @@ static bool tcp_try_undo_recovery(struct sock *sk) } /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ -static void tcp_try_undo_dsack(struct sock *sk) +static bool tcp_try_undo_dsack(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2323,7 +2323,9 @@ static void tcp_try_undo_dsack(struct sock *sk) DBGUNDO(sk, "D-SACK"); tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); + return true; } + return false; } /* We can clear retrans_stamp when there are no retransmissions in the @@ -2751,6 +2753,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, do_lost = tcp_is_reno(tp) || tcp_fackets_out(tp) > tp->reordering; } + if (tcp_try_undo_dsack(sk)) { + tcp_try_keep_open(sk); + return; + } break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack); -- cgit v1.2.3 From aef6de511a2aea78098e0d507ad7280d38b6b019 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 30 May 2013 20:36:11 +0000 Subject: sctp: Correct byte order of access to skb->{network, transport}_header Corrects an byte order conflict introduced by "sctp: Correct access to skb->{network, transport}_header". All the values in question are host byte order. Reported-by: Ben Hutchings Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index e328fe8f93f..6533d81a638 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct sctp_association *asoc = NULL; struct sctp_transport *transport; struct inet_sock *inet; - __be16 saveip, savesctp; + __u16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); -- cgit v1.2.3 From c3f1dbaf6e281642848b78fe101764170c15f168 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Fri, 31 May 2013 13:15:38 +0000 Subject: net: Update RFS target at poll for tcp/udp The current state of affairs is that read()/write() will setup RFS (Receive Flow Steering) for internet protocol sockets while poll()/epoll() does not. When poll() gets called with a TCP or UDP socket, we should update the flow target. This permits to RFS (if enabled) to select the appropriate CPU for following incoming packets. Note: Only connected UDP sockets can benefit from RFS. Signed-off-by: David Majnemer Signed-off-by: Eric Dumazet Cc: Paul Turner Cc: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 ++ net/ipv4/udp.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1f58594d5a8..b5d4ad98805 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -436,6 +436,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); + sock_rps_record_flow(sk); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index aa5eff46d13..c7338ec79cc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1967,6 +1967,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; + sock_rps_record_flow(sk); + /* Check for false positives due to checksum errors */ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) -- cgit v1.2.3 From db8caf3dbc77599dc90f4ea0a803cd1d97116f30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 May 2013 11:18:10 +0000 Subject: gro: should aggregate frames without DF GRO on IPv4 doesn't aggregate frames if they don't have DF bit set. Some servers use IP_MTU_DISCOVER/IP_PMTUDISC_PROBE, so linux receivers are unable to aggregate this kind of traffic. The right thing to do is to allow aggregation as long as the DF bit has same value on all segments. bnx2x LRO does this correctly. Signed-off-by: Eric Dumazet Cc: Jerry Chu Cc: Herbert Xu Cc: Ben Hutchings Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b05ae96aec4..9c090c7daea 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1385,7 +1385,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, goto out_unlock; id = ntohl(*(__be32 *)&iph->id); - flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); + flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); id >>= 16; for (p = *head; p; p = p->next) { @@ -1407,6 +1407,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | + ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; -- cgit v1.2.3 From 938177e9f3e0238c1712210f7bb6def38a5c8d7f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 30 May 2013 16:39:29 +0000 Subject: netfilter: Correct calculation using skb->tail and skb-network_header This corrects an regression introduced by "net: Use 16bits for *_headers fields of struct skbuff" when NET_SKBUFF_DATA_USES_OFFSET is not set. In that case skb->tail will be a pointer whereas skb->network_header will be an offset from head. This is corrected by using wrappers that ensure that calculations are always made using pointers. Reported-by: Stephen Rothwell Reported-by: Chen Gang Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/netfilter/nf_nat_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 5fea563afe3..85e20a91908 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -104,7 +104,7 @@ static void mangle_contents(struct sk_buff *skb, /* move post-replacement */ memmove(data + match_offset + rep_len, data + match_offset + match_len, - skb->tail - (skb->network_header + dataoff + + skb_tail_pointer(skb) - (skb_network_header(skb) + dataoff + match_offset + match_len)); /* insert data from buffer */ -- cgit v1.2.3 From 35d0461061f27eeb62de63174959edbbb9e434de Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 29 May 2013 15:16:05 +0800 Subject: net: clean up skb headers code commit 1a37e412a0225fcba5587 (net: Use 16bits for *_headers fields of struct skbuff) converts skb->*_header to u16, some #if NET_SKBUFF_DATA_USES_OFFSET are now useless, and to be safe, we could just use "X = (typeof(X)) ~0U;" as suggested by David. Cc: David S. Miller Cc: Simon Horman Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/skbuff.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f45de077ab9..6b1b52c5593 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -199,9 +199,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) skb->truesize = sizeof(struct sk_buff); atomic_set(&skb->users, 1); -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = (__u16) ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; out: return skb; } @@ -275,10 +273,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = (__u16) ~0U; - skb->transport_header = (__u16) ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); @@ -344,10 +340,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = (__u16) ~0U; - skb->transport_header = (__u16) ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); -- cgit v1.2.3 From bf3d6a8f791b2a81279b9ce3201b4970f6fbe51a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 27 May 2013 23:48:15 +0000 Subject: iptunnel: specify protocol outside IP header Before this patch, ip_tunnel_xmit() was using the field protocol from the IP header passed into argument. There is no functional change, this patch prepares the support of IPv4 over IPv4 for module sit. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_tunnel.c | 4 ++-- net/ipv4/ipip.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2a83591492d..a982657d05e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -429,7 +429,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, return; } - ip_tunnel_xmit(skb, dev, tnl_params); + ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } static netdev_tx_t ipgre_xmit(struct sk_buff *skb, diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e4147ec1665..b89095c1518 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -487,7 +487,7 @@ drop: EXPORT_SYMBOL_GPL(ip_tunnel_rcv); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, - const struct iphdr *tnl_params) + const struct iphdr *tnl_params, const u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *inner_iph; @@ -670,7 +670,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; iph->frag_off = df; - iph->protocol = tnl_params->protocol; + iph->protocol = protocol; iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 77bfcce64fe..9df7ecd393f 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -222,7 +222,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->encapsulation = 1; } - ip_tunnel_xmit(skb, dev, tiph); + ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); return NETDEV_TX_OK; tx_error: -- cgit v1.2.3 From 32b8a8e59c9c8fa56051d6e9ab2924e469ac4d92 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 27 May 2013 23:48:16 +0000 Subject: sit: add IPv4 over IPv4 support This patch adds the support of IPv4 over Ipv4 for the module sit. The gain of this feature is to be able to have 4in4 and 6in4 over the same interface instead of having one interface for 6in4 and another for 4in4 even if encapsulation addresses are the same. To avoid conflicting with ipip module, sit IPv4 over IPv4 protocol is registered with a smaller priority. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/xfrm4_tunnel.c | 2 +- net/ipv6/sit.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 117 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 05a5df2febc..06347dbd32c 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -63,7 +63,7 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, - .priority = 2, + .priority = 3, }; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 335363478bb..6b9c1f128ea 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -577,6 +577,10 @@ static int ipip6_rcv(struct sk_buff *skb) if (tunnel != NULL) { struct pcpu_tstats *tstats; + if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && + tunnel->parms.iph.protocol != 0) + goto out; + secpath_reset(skb); skb->mac_header = skb->network_header; skb_reset_network_header(skb); @@ -629,6 +633,35 @@ out: return 0; } +static const struct tnl_ptk_info tpi = { + /* no tunnel info required for ipip. */ + .proto = htons(ETH_P_IP), +}; + +static int ipip_rcv(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + struct ip_tunnel *tunnel; + + tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, + iph->saddr, iph->daddr); + if (tunnel != NULL) { + if (tunnel->parms.iph.protocol != IPPROTO_IPIP && + tunnel->parms.iph.protocol != 0) + goto drop; + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); + } + + return 1; + +drop: + kfree_skb(skb); + return 0; +} + /* * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function * stores the embedded IPv4 address in v4dst and returns true. @@ -877,6 +910,43 @@ tx_error: return NETDEV_TX_OK; } +static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *tiph = &tunnel->parms.iph; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); + return NETDEV_TX_OK; +} + +static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + ipip_tunnel_xmit(skb, dev); + break; + case htons(ETH_P_IPV6): + ipip6_tunnel_xmit(skb, dev); + break; + default: + goto tx_err; + } + + return NETDEV_TX_OK; + +tx_err: + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; + +} + static void ipip6_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; @@ -1027,7 +1097,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || + if (p.iph.protocol != IPPROTO_IPV6 && + p.iph.protocol != IPPROTO_IPIP && + p.iph.protocol != 0) + goto done; + if (p.iph.version != 4 || p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) goto done; if (p.iph.ttl) @@ -1164,7 +1238,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops ipip6_netdev_ops = { .ndo_uninit = ipip6_tunnel_uninit, - .ndo_start_xmit = ipip6_tunnel_xmit, + .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, @@ -1232,6 +1306,22 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) return 0; } +static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + u8 proto; + + if (!data) + return 0; + + proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + if (proto != IPPROTO_IPV6 && + proto != IPPROTO_IPIP && + proto != 0) + return -EINVAL; + + return 0; +} + static void ipip6_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm *parms) { @@ -1268,6 +1358,10 @@ static void ipip6_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_FLAGS]) parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + + if (data[IFLA_IPTUN_PROTO]) + parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); + } #ifdef CONFIG_IPV6_SIT_6RD @@ -1391,6 +1485,8 @@ static size_t ipip6_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_FLAGS */ nla_total_size(2) + + /* IFLA_IPTUN_PROTO */ + nla_total_size(1) + #ifdef CONFIG_IPV6_SIT_6RD /* IFLA_IPTUN_6RD_PREFIX */ nla_total_size(sizeof(struct in6_addr)) + @@ -1416,6 +1512,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; @@ -1445,6 +1542,7 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, + [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, #ifdef CONFIG_IPV6_SIT_6RD [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, @@ -1459,6 +1557,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = { .policy = ipip6_policy, .priv_size = sizeof(struct ip_tunnel), .setup = ipip6_tunnel_setup, + .validate = ipip6_validate, .newlink = ipip6_newlink, .changelink = ipip6_changelink, .get_size = ipip6_get_size, @@ -1471,6 +1570,12 @@ static struct xfrm_tunnel sit_handler __read_mostly = { .priority = 1, }; +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = ipip_rcv, + .err_handler = ipip6_err, + .priority = 2, +}; + static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) { int prio; @@ -1553,6 +1658,7 @@ static void __exit sit_cleanup(void) { rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); unregister_pernet_device(&sit_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ @@ -1569,9 +1675,14 @@ static int __init sit_init(void) return err; err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { - pr_info("%s: can't add protocol\n", __func__); + pr_info("%s: can't register ip6ip4\n", __func__); goto xfrm_tunnel_failed; } + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: can't register ip4ip4\n", __func__); + goto xfrm_tunnel4_failed; + } err = rtnl_link_register(&sit_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1580,6 +1691,8 @@ out: return err; rtnl_link_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel4_failed: xfrm4_tunnel_deregister(&sit_handler, AF_INET6); xfrm_tunnel_failed: unregister_pernet_device(&sit_net_ops); -- cgit v1.2.3 From 387aa65a89434abe3128d36d1a6fc3842c94905d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Mon, 27 May 2013 20:46:31 +0000 Subject: ipv4: properly refresh rtable entries on pmtu/redirect events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 05ab86c5 (xfrm4: Invalidate all ipv4 routes on IPsec pmtu events). Flushing all cached entries is not needed. Instead, invalidate only the related next hop dsts to recheck for the added next hop exception where needed. This also fixes a subtle race due to bumping generation id's before updating the pmtu. Cc: Steffen Klassert Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- net/ipv4/ah4.c | 7 ++----- net/ipv4/esp4.c | 7 ++----- net/ipv4/ipcomp.c | 7 ++----- net/ipv4/route.c | 63 ++++++++++++++++++++++++++++++++----------------------- 4 files changed, 43 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 2e7f1948216..717902669d2 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -419,12 +419,9 @@ static void ah4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4cfe34d4cc9..ab3d814bc80 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -502,12 +502,9 @@ static void esp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 59cb8c76905..826be4cb482 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -47,12 +47,9 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 550781a17b3..561a37833d8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -594,11 +594,25 @@ static inline u32 fnhe_hashfun(__be32 daddr) return hval & (FNHE_HASH_SIZE - 1); } +static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) +{ + rt->rt_pmtu = fnhe->fnhe_pmtu; + rt->dst.expires = fnhe->fnhe_expires; + + if (fnhe->fnhe_gw) { + rt->rt_flags |= RTCF_REDIRECTED; + rt->rt_gateway = fnhe->fnhe_gw; + rt->rt_uses_gateway = 1; + } +} + static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, u32 pmtu, unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; + struct rtable *rt; + unsigned int i; int depth; u32 hval = fnhe_hashfun(daddr); @@ -627,8 +641,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_gw = gw; if (pmtu) { fnhe->fnhe_pmtu = pmtu; - fnhe->fnhe_expires = expires; + fnhe->fnhe_expires = max(1UL, expires); } + /* Update all cached dsts too */ + rt = rcu_dereference(fnhe->fnhe_rth); + if (rt) + fill_route_from_fnhe(rt, fnhe); } else { if (depth > FNHE_RECLAIM_DEPTH) fnhe = fnhe_oldest(hash); @@ -644,6 +662,18 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; fnhe->fnhe_expires = expires; + + /* Exception created; mark the cached routes for the nexthop + * stale, so anyone caching it rechecks if this exception + * applies to them. + */ + for_each_possible_cpu(i) { + struct rtable __rcu **prt; + prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); + rt = rcu_dereference(*prt); + if (rt) + rt->dst.obsolete = DST_OBSOLETE_KILL; + } } fnhe->fnhe_stamp = jiffies; @@ -917,13 +947,6 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - if (!rt->rt_pmtu) { - dst->obsolete = DST_OBSOLETE_KILL; - } else { - rt->rt_pmtu = mtu; - dst->expires = max(1UL, jiffies + ip_rt_mtu_expires); - } - rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); @@ -1063,11 +1086,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. * - * When a PMTU/redirect information update invalidates a - * route, this is indicated by setting obsolete to - * DST_OBSOLETE_KILL. + * When a PMTU/redirect information update invalidates a route, + * this is indicated by setting obsolete to DST_OBSOLETE_KILL or + * DST_OBSOLETE_DEAD by dst_free(). */ - if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt)) + if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) return NULL; return dst; } @@ -1215,20 +1238,8 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; } - if (fnhe->fnhe_pmtu) { - unsigned long expires = fnhe->fnhe_expires; - unsigned long diff = expires - jiffies; - - if (time_before(jiffies, expires)) { - rt->rt_pmtu = fnhe->fnhe_pmtu; - dst_set_expires(&rt->dst, diff); - } - } - if (fnhe->fnhe_gw) { - rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = fnhe->fnhe_gw; - rt->rt_uses_gateway = 1; - } else if (!rt->rt_gateway) + fill_route_from_fnhe(rt, fnhe); + if (!rt->rt_gateway) rt->rt_gateway = daddr; rcu_assign_pointer(fnhe->fnhe_rth, rt); -- cgit v1.2.3 From f016229e303c294afac721de4cd4427e634950ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Mon, 27 May 2013 20:46:32 +0000 Subject: ipv4: rate limit updating of next hop exceptions with same pmtu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tunnel devices call update_pmtu for each packet sent, this causes contention on the fnhe_lock. Ignore the pmtu update if pmtu is not actually changed, and there is still plenty of time before the entry expires. Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 561a37833d8..a4082be1b9b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -947,6 +947,10 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + if (rt->rt_pmtu == mtu && + time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) + return; + rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); -- cgit v1.2.3 From 5aad1de5ea2c260b4cd2f70b70e146d55dbbc528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Mon, 27 May 2013 20:46:33 +0000 Subject: ipv4: use separate genid for next hop exceptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 13d82bf5 (ipv4: Fix flushing of cached routing informations) added the support to flush learned pmtu information. However, using rt_genid is quite heavy as it is bumped on route add/change and multicast events amongst other places. These can happen quite often, especially if using dynamic routing protocols. While this is ok with routes (as they are just recreated locally), the pmtu information is learned from remote systems and the icmp notification can come with long delays. It is worthy to have separate genid to avoid excessive pmtu resets. Cc: Steffen Klassert Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- net/ipv4/route.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a4082be1b9b..403e2830286 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -658,6 +658,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_next = hash->chain; rcu_assign_pointer(hash->chain, fnhe); } + fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; @@ -1236,8 +1237,11 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { + int genid = fnhe_genid(dev_net(rt->dst.dev)); struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); - if (orig && rt_is_expired(orig)) { + + if (fnhe->fnhe_genid != genid) { + fnhe->fnhe_genid = genid; fnhe->fnhe_gw = 0; fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; @@ -2443,8 +2447,11 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = (struct net *)__ctl->extra1; + if (write) { - rt_cache_flush((struct net *)__ctl->extra1); + rt_cache_flush(net); + fnhe_genid_bump(net); return 0; } @@ -2619,6 +2626,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { atomic_set(&net->rt_genid, 0); + atomic_set(&net->fnhe_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; -- cgit v1.2.3 From 08578d8d4eb76b7afe314fa03abe167761462fe4 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Mon, 3 Jun 2013 00:23:25 +0000 Subject: ] icmp: fix icmp_unreach() comment. ICMP_PARAMETERPROB is handled by icmp_unreach(); This patch adds ICMP_PARAMETERPROB to the list of ICMP message types handled by icmp_unreach(). Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5d0d379b015..2864ca33bed 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -658,7 +658,8 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) } /* - * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. + * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and + * ICMP_PARAMETERPROB. */ static void icmp_unreach(struct sk_buff *skb) -- cgit v1.2.3 From 9a99d4a50cb8ce516adf0f2436138d4c8e6e4535 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Jun 2013 15:00:52 +0000 Subject: icmp: avoid allocating large struct on stack struct icmp_bxm is a large struct, reduce stack usage by allocating it on heap. Cc: Eric Dumazet Cc: Joe Perches Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2864ca33bed..5f7d11a4587 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -482,7 +482,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct iphdr *iph; int room; - struct icmp_bxm icmp_param; + struct icmp_bxm *icmp_param; struct rtable *rt = skb_rtable(skb_in); struct ipcm_cookie ipc; struct flowi4 fl4; @@ -558,9 +558,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } + icmp_param = kmalloc(sizeof(*icmp_param), GFP_ATOMIC); + if (!icmp_param) + return; + sk = icmp_xmit_lock(net); if (sk == NULL) - return; + goto out_free; /* * Construct source address and options. @@ -586,7 +590,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) IPTOS_PREC_INTERNETCONTROL) : iph->tos; - if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) + if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) goto out_unlock; @@ -594,19 +598,19 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) * Prepare data for ICMP header. */ - icmp_param.data.icmph.type = type; - icmp_param.data.icmph.code = code; - icmp_param.data.icmph.un.gateway = info; - icmp_param.data.icmph.checksum = 0; - icmp_param.skb = skb_in; - icmp_param.offset = skb_network_offset(skb_in); + icmp_param->data.icmph.type = type; + icmp_param->data.icmph.code = code; + icmp_param->data.icmph.un.gateway = info; + icmp_param->data.icmph.checksum = 0; + icmp_param->skb = skb_in; + icmp_param->offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; - ipc.opt = &icmp_param.replyopts.opt; + ipc.opt = &icmp_param->replyopts.opt; ipc.tx_flags = 0; rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, - type, code, &icmp_param); + type, code, icmp_param); if (IS_ERR(rt)) goto out_unlock; @@ -618,19 +622,21 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) room = dst_mtu(&rt->dst); if (room > 576) room = 576; - room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; + room -= sizeof(struct iphdr) + icmp_param->replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); - icmp_param.data_len = skb_in->len - icmp_param.offset; - if (icmp_param.data_len > room) - icmp_param.data_len = room; - icmp_param.head_len = sizeof(struct icmphdr); + icmp_param->data_len = skb_in->len - icmp_param->offset; + if (icmp_param->data_len > room) + icmp_param->data_len = room; + icmp_param->head_len = sizeof(struct icmphdr); - icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); + icmp_push_reply(icmp_param, &fl4, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); +out_free: + kfree(icmp_param); out:; } EXPORT_SYMBOL(icmp_send); -- cgit v1.2.3 From ed405be5cb5e8e494e8dcd83b6eb1add34dc752a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Jun 2013 13:51:59 +0200 Subject: mac80211: fix sdata locking around __ieee80211_request_smps My cfg80211/mac80211 locking unification broke the sdata locking in ieee80211_set_power_mgmt, it needs to acquire the lock for __ieee80211_request_smps(). Add the locking. Reported-by: Jakub Kicinski Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9034da16cf1..30622101d3b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2378,7 +2378,9 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ + sdata_lock(sdata); __ieee80211_request_smps(sdata, sdata->u.mgd.req_smps); + sdata_unlock(sdata); if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); -- cgit v1.2.3 From 866403a7bdd3941cbb4e2085d8ac368dcabe800c Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Fri, 31 May 2013 17:41:47 -0700 Subject: mac80211: don't check local mesh TTL on TX nl80211 has already verified the mesh TTL on setting the mesh config, so no need to check it again in mac80211. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 34be9336b5d..4105d0ca963 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1790,12 +1790,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: - if (!sdata->u.mesh.mshcfg.dot11MeshTTL) { - /* Do not send frames with mesh_ttl == 0 */ - sdata->u.mesh.mshstats.dropped_frames_ttl++; - goto fail_rcu; - } - if (!is_multicast_ether_addr(skb->data)) { struct sta_info *next_hop; bool mpp_lookup = true; -- cgit v1.2.3 From e05ecccdf752122a439b03c3190458d2c8f0bac6 Mon Sep 17 00:00:00 2001 From: Jacob Minshall Date: Wed, 29 May 2013 14:32:36 -0700 Subject: mac80211: set mesh formation field properly Cap max peerings at 63 in accordance with IEEE-2012 8.4.2.100.7. Triggers a beacon regeneration every time the number of peerings changes. Previously this would only happen if the "accepting peerings" bit changed. Signed-off-by: Jacob Minshall Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 3 +-- net/mac80211/mesh.h | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index b3d1fdd4636..73a597bad6e 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -274,8 +274,7 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, *pos++ = ifmsh->mesh_auth_id; /* Mesh Formation Info - number of neighbors */ neighbors = atomic_read(&ifmsh->estab_plinks); - /* Number of neighbor mesh STAs or 15 whichever is smaller */ - neighbors = (neighbors > 15) ? 15 : neighbors; + neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS); *pos++ = neighbors << 1; /* Mesh capability */ *pos = IEEE80211_MESHCONF_CAPAB_FORWARDING; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index da158774eeb..8b4d9a3e9ee 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -324,14 +324,14 @@ static inline u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_inc(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON; } static inline u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_dec(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON; } static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata) -- cgit v1.2.3 From 964dc9e2c3aaccacacd40640964a58544fb5769a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Jun 2013 17:25:34 +0200 Subject: cfg80211: take WoWLAN support information out of wiphy struct There's no need to take up the space for devices that don't support WoWLAN, and most drivers can even make the support data static const (except where it's modified at runtime.) Signed-off-by: Johannes Berg --- net/mac80211/main.c | 3 +-- net/wireless/core.c | 20 ++++++++++-------- net/wireless/nl80211.c | 56 ++++++++++++++++++++++++-------------------------- 3 files changed, 40 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1998f147526..626c83c042d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -686,8 +686,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return -EINVAL; #ifdef CONFIG_PM - if ((hw->wiphy->wowlan.flags || hw->wiphy->wowlan.n_patterns) && - (!local->ops->suspend || !local->ops->resume)) + if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume)) return -EINVAL; #endif diff --git a/net/wireless/core.c b/net/wireless/core.c index 41cec1776f4..f553b9484c1 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -449,8 +449,13 @@ int wiphy_register(struct wiphy *wiphy) u16 ifmodes = wiphy->interface_modes; #ifdef CONFIG_PM - if (WARN_ON((wiphy->wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && - !(wiphy->wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY))) + if (WARN_ON(wiphy->wowlan && + (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && + !(wiphy->wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY))) + return -EINVAL; + if (WARN_ON(wiphy->wowlan && + !wiphy->wowlan->flags && !wiphy->wowlan->n_patterns && + !wiphy->wowlan->tcp)) return -EINVAL; #endif @@ -540,12 +545,11 @@ int wiphy_register(struct wiphy *wiphy) } #ifdef CONFIG_PM - if (rdev->wiphy.wowlan.n_patterns) { - if (WARN_ON(!rdev->wiphy.wowlan.pattern_min_len || - rdev->wiphy.wowlan.pattern_min_len > - rdev->wiphy.wowlan.pattern_max_len)) - return -EINVAL; - } + if (WARN_ON(rdev->wiphy.wowlan && rdev->wiphy.wowlan->n_patterns && + (!rdev->wiphy.wowlan->pattern_min_len || + rdev->wiphy.wowlan->pattern_min_len > + rdev->wiphy.wowlan->pattern_max_len))) + return -EINVAL; #endif /* check and set up bitrates */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 31d265f36d2..7ee9af3283a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -908,7 +908,7 @@ nla_put_failure: static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { - const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp; + const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan->tcp; struct nlattr *nl_tcp; if (!tcp) @@ -951,37 +951,37 @@ static int nl80211_send_wowlan(struct sk_buff *msg, { struct nlattr *nl_wowlan; - if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns) + if (!dev->wiphy.wowlan) return 0; nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); if (!nl_wowlan) return -ENOBUFS; - if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && + if (((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) return -ENOBUFS; - if (dev->wiphy.wowlan.n_patterns) { + if (dev->wiphy.wowlan->n_patterns) { struct nl80211_wowlan_pattern_support pat = { - .max_patterns = dev->wiphy.wowlan.n_patterns, - .min_pattern_len = dev->wiphy.wowlan.pattern_min_len, - .max_pattern_len = dev->wiphy.wowlan.pattern_max_len, - .max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset, + .max_patterns = dev->wiphy.wowlan->n_patterns, + .min_pattern_len = dev->wiphy.wowlan->pattern_min_len, + .max_pattern_len = dev->wiphy.wowlan->pattern_max_len, + .max_pkt_offset = dev->wiphy.wowlan->max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, @@ -7580,8 +7580,7 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) void *hdr; u32 size = NLMSG_DEFAULT_SIZE; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && - !rdev->wiphy.wowlan.tcp) + if (!rdev->wiphy.wowlan) return -EOPNOTSUPP; if (rdev->wiphy.wowlan_config && rdev->wiphy.wowlan_config->tcp) { @@ -7654,7 +7653,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, u32 data_size, wake_size, tokens_size = 0, wake_mask_size; int err, port; - if (!rdev->wiphy.wowlan.tcp) + if (!rdev->wiphy.wowlan->tcp) return -EINVAL; err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, @@ -7674,16 +7673,16 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, return -EINVAL; data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]); - if (data_size > rdev->wiphy.wowlan.tcp->data_payload_max) + if (data_size > rdev->wiphy.wowlan->tcp->data_payload_max) return -EINVAL; if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > - rdev->wiphy.wowlan.tcp->data_interval_max || + rdev->wiphy.wowlan->tcp->data_interval_max || nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0) return -EINVAL; wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); - if (wake_size > rdev->wiphy.wowlan.tcp->wake_payload_max) + if (wake_size > rdev->wiphy.wowlan->tcp->wake_payload_max) return -EINVAL; wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]); @@ -7698,13 +7697,13 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, if (!tok->len || tokens_size % tok->len) return -EINVAL; - if (!rdev->wiphy.wowlan.tcp->tok) + if (!rdev->wiphy.wowlan->tcp->tok) return -EINVAL; - if (tok->len > rdev->wiphy.wowlan.tcp->tok->max_len) + if (tok->len > rdev->wiphy.wowlan->tcp->tok->max_len) return -EINVAL; - if (tok->len < rdev->wiphy.wowlan.tcp->tok->min_len) + if (tok->len < rdev->wiphy.wowlan->tcp->tok->min_len) return -EINVAL; - if (tokens_size > rdev->wiphy.wowlan.tcp->tok->bufsize) + if (tokens_size > rdev->wiphy.wowlan->tcp->tok->bufsize) return -EINVAL; if (tok->offset + tok->len > data_size) return -EINVAL; @@ -7712,7 +7711,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) { seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]); - if (!rdev->wiphy.wowlan.tcp->seq) + if (!rdev->wiphy.wowlan->tcp->seq) return -EINVAL; if (seq->len == 0 || seq->len > 4) return -EINVAL; @@ -7793,12 +7792,11 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) struct nlattr *tb[NUM_NL80211_WOWLAN_TRIG]; struct cfg80211_wowlan new_triggers = {}; struct cfg80211_wowlan *ntrig; - struct wiphy_wowlan_support *wowlan = &rdev->wiphy.wowlan; + const struct wiphy_wowlan_support *wowlan = rdev->wiphy.wowlan; int err, i; bool prev_enabled = rdev->wiphy.wowlan_config; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && - !rdev->wiphy.wowlan.tcp) + if (!wowlan) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { -- cgit v1.2.3 From d6d23de2786edca61fb9813ff7cdc7d2543d08a7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 4 Jun 2013 12:15:42 +0200 Subject: mac80211: add a tx control flag to indicate PS-Poll/uAPSD response Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a04c5671d7f..b4297982d34 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1132,6 +1132,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, * ends the poll/service period. */ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_CTL_PS_RESPONSE | IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; @@ -1269,7 +1270,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, * STA may still remain is PS mode after this frame * exchange. */ - info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_CTL_PS_RESPONSE; /* * Use MoreData flag to indicate whether there are -- cgit v1.2.3 From 9c90a9f64c21b0a3983655c9c08cf98489057a43 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Jun 2013 12:46:03 +0200 Subject: nl80211: remove bogus genlmsg_end() error checking genlmsg_end() can't return an error since it returns the skb length so remove checks treating the return value as an error code. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 40 +++++++--------------------------------- 1 file changed, 7 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7ee9af3283a..ce949e38178 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9848,7 +9848,6 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err; u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); if (!nlportid) @@ -9869,12 +9868,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr)) goto nla_put_failure; - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return true; - } - + genlmsg_end(msg, hdr); genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return true; @@ -10317,10 +10311,7 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, if (nl80211_send_chandef(msg, chandef)) goto nla_put_failure; - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10386,7 +10377,6 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err; trace_cfg80211_probe_status(dev, addr, cookie, acked); @@ -10408,11 +10398,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, (acked && nla_put_flag(msg, NL80211_ATTR_ACK))) goto nla_put_failure; - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10478,7 +10464,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err, size = 200; + int size = 200; trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup); @@ -10564,9 +10550,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, nla_nest_end(msg, reasons); } - err = genlmsg_end(msg, hdr); - if (err < 0) - goto free_msg; + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10586,7 +10570,6 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err; trace_cfg80211_tdls_oper_request(wdev->wiphy, dev, peer, oper, reason_code); @@ -10609,11 +10592,7 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason_code))) goto nla_put_failure; - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10671,7 +10650,6 @@ void cfg80211_ft_event(struct net_device *netdev, struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; void *hdr; - int err; trace_cfg80211_ft_event(wiphy, netdev, ft_event); @@ -10697,11 +10675,7 @@ void cfg80211_ft_event(struct net_device *netdev, nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, ft_event->ric_ies); - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL); -- cgit v1.2.3 From ff40b425f04144771920b79672d6691910c7def7 Mon Sep 17 00:00:00 2001 From: Pontus Fuchs Date: Tue, 4 Jun 2013 12:44:52 +0200 Subject: mac80211: set IEEE80211_TX_CTL_REQ_TX_STATUS on nullframes The connection monitor needs to know the tx status of nullframes to work properly. Signed-off-by: Pontus Fuchs Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f44f4caa69e..9950e13f641 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -880,6 +880,10 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | IEEE80211_TX_INTFL_OFFCHAN_TX_OK; + + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL)) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE; -- cgit v1.2.3 From 6ff57cf88807dd81300b5b9c623dc5eb6422b9f6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 May 2013 00:55:00 +0200 Subject: cfg80211/mac80211: clean up cfg80211 SME APIs Do some cleanups in the cfg80211 SME APIs, which are only used by mac80211. Most of these functions get a frame passed, and there isn't really any reason to export multiple functions as cfg80211 can check the frame type instead, do that. Additionally, the API functions have confusing names like cfg80211_send_...() which was meant to indicate that it sends an event to userspace, but gets a bit confusing when there's both TX and RX and they're not all clearly labeled. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 44 ++++++++++++++------------ net/mac80211/rx.c | 26 ++++++--------- net/wireless/mlme.c | 86 +++++++++++++++++++++++++++++++------------------- net/wireless/nl80211.c | 30 ++++++++---------- net/wireless/trace.h | 46 ++++++++++++++++++++------- 5 files changed, 134 insertions(+), 98 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9950e13f641..df8170a80a5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2155,7 +2155,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); sdata_unlock(sdata); } @@ -2302,7 +2303,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "%pM denied authentication (status %d)\n", mgmt->sa, status_code); ieee80211_destroy_auth_data(sdata, false); - cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); return; } @@ -2337,7 +2338,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, * Report auth frame to user space for processing since another * round of Authentication frames is still needed. */ - cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); return; } @@ -2354,7 +2355,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, } mutex_unlock(&sdata->local->sta_mtx); - cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, len); + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); return; out_err: mutex_unlock(&sdata->local->sta_mtx); @@ -2387,7 +2388,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, len); + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); } @@ -2413,7 +2414,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, len); + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); } static void ieee80211_get_rates(struct ieee80211_supported_band *sband, @@ -2711,7 +2712,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false); cfg80211_put_bss(sdata->local->hw.wiphy, bss); - cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid); + cfg80211_assoc_timeout(sdata->dev, mgmt->bssid); return; } sdata_info(sdata, "associated\n"); @@ -2724,7 +2725,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee80211_destroy_assoc_data(sdata, true); } - cfg80211_send_rx_assoc(sdata->dev, bss, (u8 *)mgmt, len); + cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, @@ -3117,8 +3118,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, true, deauth_buf); - cfg80211_send_deauth(sdata->dev, deauth_buf, - sizeof(deauth_buf)); + cfg80211_tx_mlme_mgmt(sdata->dev, deauth_buf, + sizeof(deauth_buf)); return; } @@ -3236,7 +3237,8 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, tx, frame_buf); - cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); } static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) @@ -3427,7 +3429,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_auth_data(sdata, false); - cfg80211_send_auth_timeout(sdata->dev, bssid); + cfg80211_auth_timeout(sdata->dev, bssid); } } else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started) run_again(sdata, ifmgd->auth_data->timeout); @@ -3443,7 +3445,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_assoc_data(sdata, false); - cfg80211_send_assoc_timeout(sdata->dev, bssid); + cfg80211_assoc_timeout(sdata->dev, bssid); } } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) run_again(sdata, ifmgd->assoc_data->timeout); @@ -3992,8 +3994,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + sizeof(frame_buf)); } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -4055,8 +4057,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + sizeof(frame_buf)); } if (ifmgd->auth_data && !ifmgd->auth_data->done) { @@ -4309,8 +4311,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, out: if (report_frame) - cfg80211_send_deauth(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } @@ -4340,8 +4342,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, req->reason_code, !req->local_state_change, frame_buf); - cfg80211_send_disassoc(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index bdd7b4a719e..23dbcfc69b3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1747,27 +1747,21 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) if (unlikely(!ieee80211_has_protected(fc) && ieee80211_is_unicast_robust_mgmt_frame(rx->skb) && rx->key)) { - if (ieee80211_is_deauth(fc)) - cfg80211_send_unprot_deauth(rx->sdata->dev, - rx->skb->data, - rx->skb->len); - else if (ieee80211_is_disassoc(fc)) - cfg80211_send_unprot_disassoc(rx->sdata->dev, - rx->skb->data, - rx->skb->len); + if (ieee80211_is_deauth(fc) || + ieee80211_is_disassoc(fc)) + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + rx->skb->data, + rx->skb->len); return -EACCES; } /* BIP does not use Protected field, so need to check MMIE */ if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) && ieee80211_get_mmie_keyidx(rx->skb) < 0)) { - if (ieee80211_is_deauth(fc)) - cfg80211_send_unprot_deauth(rx->sdata->dev, - rx->skb->data, - rx->skb->len); - else if (ieee80211_is_disassoc(fc)) - cfg80211_send_unprot_disassoc(rx->sdata->dev, - rx->skb->data, - rx->skb->len); + if (ieee80211_is_deauth(fc) || + ieee80211_is_disassoc(fc)) + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + rx->skb->data, + rx->skb->len); return -EACCES; } /* diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 7bde5d9c000..4b9c2be0d56 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -18,20 +18,7 @@ #include "rdev-ops.h" -void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_send_rx_auth(dev); - - nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); - cfg80211_sme_rx_auth(dev, buf, len); -} -EXPORT_SYMBOL(cfg80211_send_rx_auth); - -void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, +void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, const u8 *buf, size_t len) { u16 status_code; @@ -84,10 +71,10 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, status_code, status_code == WLAN_STATUS_SUCCESS, bss); } -EXPORT_SYMBOL(cfg80211_send_rx_assoc); +EXPORT_SYMBOL(cfg80211_rx_assoc_resp); -void cfg80211_send_deauth(struct net_device *dev, - const u8 *buf, size_t len) +static void cfg80211_process_deauth(struct net_device *dev, + const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -96,9 +83,6 @@ void cfg80211_send_deauth(struct net_device *dev, const u8 *bssid = mgmt->bssid; bool was_current = false; - trace_cfg80211_send_deauth(dev); - ASSERT_WDEV_LOCK(wdev); - if (wdev->current_bss && ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { cfg80211_unhold_bss(wdev->current_bss); @@ -123,10 +107,9 @@ void cfg80211_send_deauth(struct net_device *dev, false, NULL); } } -EXPORT_SYMBOL(cfg80211_send_deauth); -void cfg80211_send_disassoc(struct net_device *dev, - const u8 *buf, size_t len) +static void cfg80211_process_disassoc(struct net_device *dev, + const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -136,9 +119,6 @@ void cfg80211_send_disassoc(struct net_device *dev, u16 reason_code; bool from_ap; - trace_cfg80211_send_disassoc(dev); - ASSERT_WDEV_LOCK(wdev); - nl80211_send_disassoc(rdev, dev, buf, len, GFP_KERNEL); if (wdev->sme_state != CFG80211_SME_CONNECTED) @@ -153,15 +133,38 @@ void cfg80211_send_disassoc(struct net_device *dev, } else WARN_ON(1); - reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); } -EXPORT_SYMBOL(cfg80211_send_disassoc); -void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) +void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct ieee80211_mgmt *mgmt = (void *)buf; + + ASSERT_WDEV_LOCK(wdev); + + trace_cfg80211_rx_mlme_mgmt(dev, buf, len); + + if (WARN_ON(len < 2)) + return; + + if (ieee80211_is_auth(mgmt->frame_control)) { + nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); + cfg80211_sme_rx_auth(dev, buf, len); + } else if (ieee80211_is_deauth(mgmt->frame_control)) { + cfg80211_process_deauth(dev, buf, len); + } else if (ieee80211_is_disassoc(mgmt->frame_control)) { + cfg80211_process_disassoc(dev, buf, len); + } +} +EXPORT_SYMBOL(cfg80211_rx_mlme_mgmt); + +void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -175,9 +178,9 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); } -EXPORT_SYMBOL(cfg80211_send_auth_timeout); +EXPORT_SYMBOL(cfg80211_auth_timeout); -void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr) +void cfg80211_assoc_timeout(struct net_device *dev, const u8 *addr) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -191,7 +194,26 @@ void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr) WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); } -EXPORT_SYMBOL(cfg80211_send_assoc_timeout); +EXPORT_SYMBOL(cfg80211_assoc_timeout); + +void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct ieee80211_mgmt *mgmt = (void *)buf; + + ASSERT_WDEV_LOCK(wdev); + + trace_cfg80211_tx_mlme_mgmt(dev, buf, len); + + if (WARN_ON(len < 2)) + return; + + if (ieee80211_is_deauth(mgmt->frame_control)) + cfg80211_process_deauth(dev, buf, len); + else + cfg80211_process_disassoc(dev, buf, len); +} +EXPORT_SYMBOL(cfg80211_tx_mlme_mgmt); void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, enum nl80211_key_type key_type, int key_id, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ce949e38178..444f5effb77 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9313,31 +9313,27 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, NL80211_CMD_DISASSOCIATE, gfp); } -void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf, - size_t len) +void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, + size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + const struct ieee80211_mgmt *mgmt = (void *)buf; + u32 cmd; - trace_cfg80211_send_unprot_deauth(dev); - nl80211_send_mlme_event(rdev, dev, buf, len, - NL80211_CMD_UNPROT_DEAUTHENTICATE, GFP_ATOMIC); -} -EXPORT_SYMBOL(cfg80211_send_unprot_deauth); + if (WARN_ON(len < 2)) + return; -void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, - size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + if (ieee80211_is_deauth(mgmt->frame_control)) + cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE; + else + cmd = NL80211_CMD_UNPROT_DISASSOCIATE; - trace_cfg80211_send_unprot_disassoc(dev); - nl80211_send_mlme_event(rdev, dev, buf, len, - NL80211_CMD_UNPROT_DISASSOCIATE, GFP_ATOMIC); + trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len); + nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC); } -EXPORT_SYMBOL(cfg80211_send_unprot_disassoc); +EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt); static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, int cmd, diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 23fafeae8a1..e1534baf2eb 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1911,24 +1911,46 @@ TRACE_EVENT(cfg80211_send_rx_assoc, NETDEV_PR_ARG, MAC_PR_ARG(bssid), CHAN_PR_ARG) ); -DEFINE_EVENT(netdev_evt_only, cfg80211_send_deauth, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +DECLARE_EVENT_CLASS(netdev_frame_event, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len), + TP_STRUCT__entry( + NETDEV_ENTRY + __dynamic_array(u8, frame, len) + ), + TP_fast_assign( + NETDEV_ASSIGN; + memcpy(__get_dynamic_array(frame), buf, len); + ), + TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x", + NETDEV_PR_ARG, + le16_to_cpup((__le16 *)__get_dynamic_array(frame))) ); -DEFINE_EVENT(netdev_evt_only, cfg80211_send_disassoc, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +DEFINE_EVENT(netdev_frame_event, cfg80211_rx_unprot_mlme_mgmt, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len) ); -DEFINE_EVENT(netdev_evt_only, cfg80211_send_unprot_deauth, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +DEFINE_EVENT(netdev_frame_event, cfg80211_rx_mlme_mgmt, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len) ); -DEFINE_EVENT(netdev_evt_only, cfg80211_send_unprot_disassoc, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +TRACE_EVENT(cfg80211_tx_mlme_mgmt, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len), + TP_STRUCT__entry( + NETDEV_ENTRY + __dynamic_array(u8, frame, len) + ), + TP_fast_assign( + NETDEV_ASSIGN; + memcpy(__get_dynamic_array(frame), buf, len); + ), + TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x", + NETDEV_PR_ARG, + le16_to_cpup((__le16 *)__get_dynamic_array(frame))) ); DECLARE_EVENT_CLASS(netdev_mac_evt, -- cgit v1.2.3 From ceca7b7121795ef81bd598a240d53a925662d0c1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 May 2013 00:55:45 +0200 Subject: cfg80211: separate internal SME implementation The current internal SME implementation in cfg80211 is very mixed up with the MLME handling, which has been causing issues for a long time. There are three things that the implementation has to provide: * a basic SME implementation for nl80211's connect() call (for drivers implementing auth/assoc, which is really just mac80211) and wireless extensions * MLME events for the userspace SME * SME events (connected, disconnected etc.) for all different SME implementation possibilities (driver, cfg80211 and userspace) To achieve these goals it isn't necessary to track the software SME's connection status outside of it's state (which is the part that caused many issues.) Instead, track it only in the SME data (wdev->conn) and in the general case only track whether the wdev is connected or not (via wdev->current_bss.) Also separate the internal implementation to not have callbacks from the SME events, but rather call it from the API functions that the driver (or rather mac80211) calls. This separates the code better. Signed-off-by: Johannes Berg --- net/wireless/core.c | 1 - net/wireless/core.h | 30 +-- net/wireless/ibss.c | 6 - net/wireless/mlme.c | 191 ++++++----------- net/wireless/nl80211.c | 5 +- net/wireless/sme.c | 542 +++++++++++++++++++++++------------------------- net/wireless/wext-sme.c | 8 +- 7 files changed, 335 insertions(+), 448 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index f553b9484c1..221e76b53a9 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -821,7 +821,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, pr_err("failed to add phy80211 symlink to netdev!\n"); } wdev->netdev = dev; - wdev->sme_state = CFG80211_SME_IDLE; #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = -1; wdev->wext.default_mgmt_key = -1; diff --git a/net/wireless/core.h b/net/wireless/core.h index a65eaf8a84c..a6b45bf00f3 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -308,11 +308,6 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, bool local_state_change); void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev); -void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, - u16 status, bool wextev, - struct cfg80211_bss *bss); int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, u16 frame_type, const u8 *match_data, int match_len); @@ -328,12 +323,19 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, const struct ieee80211_vht_cap *vht_capa_mask); -/* SME */ +/* SME events */ int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_connect_params *connect, struct cfg80211_cached_keys *connkeys, const u8 *prev_bssid); +void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, + u16 status, bool wextev, + struct cfg80211_bss *bss); +void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, + size_t ie_len, u16 reason, bool from_ap); int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev); @@ -344,21 +346,21 @@ void __cfg80211_roamed(struct wireless_dev *wdev, int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +/* SME implementation */ void cfg80211_conn_work(struct work_struct *work); -void cfg80211_sme_failed_assoc(struct wireless_dev *wdev); -bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev); +void cfg80211_sme_scan_done(struct net_device *dev); +bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status); +void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len); +void cfg80211_sme_disassoc(struct wireless_dev *wdev); +void cfg80211_sme_deauth(struct wireless_dev *wdev); +void cfg80211_sme_auth_timeout(struct wireless_dev *wdev); +void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); -void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, - size_t ie_len, u16 reason, bool from_ap); -void cfg80211_sme_scan_done(struct net_device *dev); -void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len); -void cfg80211_sme_disassoc(struct net_device *dev, - struct cfg80211_internal_bss *bss); void __cfg80211_scan_done(struct work_struct *wk); void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak); void __cfg80211_sched_scan_results(struct work_struct *wk); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 5449c5a6de8..39bff7d3676 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -43,7 +43,6 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid) cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - wdev->sme_state = CFG80211_SME_CONNECTED; cfg80211_upload_connect_keys(wdev); nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, @@ -64,8 +63,6 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp) trace_cfg80211_ibss_joined(dev, bssid); - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING); - ev = kzalloc(sizeof(*ev), gfp); if (!ev) return; @@ -120,7 +117,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, #ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.chandef = params->chandef; #endif - wdev->sme_state = CFG80211_SME_CONNECTING; err = cfg80211_can_use_chan(rdev, wdev, params->chandef.chan, params->channel_fixed @@ -134,7 +130,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, err = rdev_join_ibss(rdev, dev, params); if (err) { wdev->connect_keys = NULL; - wdev->sme_state = CFG80211_SME_IDLE; return err; } @@ -186,7 +181,6 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) } wdev->current_bss = NULL; - wdev->sme_state = CFG80211_SME_IDLE; wdev->ssid_len = 0; #ifdef CONFIG_CFG80211_WEXT if (!nowext) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 4b9c2be0d56..a61a44bc6cf 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -21,129 +21,85 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, const u8 *buf, size_t len) { - u16 status_code; struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u8 *ie = mgmt->u.assoc_resp.variable; int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); + u16 status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); trace_cfg80211_send_rx_assoc(dev, bss); - status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); - /* * This is a bit of a hack, we don't notify userspace of * a (re-)association reply if we tried to send a reassoc * and got a reject -- we only try again with an assoc * frame instead of reassoc. */ - if (status_code != WLAN_STATUS_SUCCESS && wdev->conn && - cfg80211_sme_failed_reassoc(wdev)) { + if (cfg80211_sme_rx_assoc_resp(wdev, status_code)) { cfg80211_put_bss(wiphy, bss); return; } nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL); - - if (status_code != WLAN_STATUS_SUCCESS && wdev->conn) { - cfg80211_sme_failed_assoc(wdev); - /* - * do not call connect_result() now because the - * sme will schedule work that does it later. - */ - cfg80211_put_bss(wiphy, bss); - return; - } - - if (!wdev->conn && wdev->sme_state == CFG80211_SME_IDLE) { - /* - * This is for the userspace SME, the CONNECTING - * state will be changed to CONNECTED by - * __cfg80211_connect_result() below. - */ - wdev->sme_state = CFG80211_SME_CONNECTING; - } - - /* this consumes the bss reference */ + /* update current_bss etc., consumes the bss reference */ __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs, status_code, status_code == WLAN_STATUS_SUCCESS, bss); } EXPORT_SYMBOL(cfg80211_rx_assoc_resp); -static void cfg80211_process_deauth(struct net_device *dev, +static void cfg80211_process_auth(struct wireless_dev *wdev, + const u8 *buf, size_t len) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + nl80211_send_rx_auth(rdev, wdev->netdev, buf, len, GFP_KERNEL); + cfg80211_sme_rx_auth(wdev, buf, len); +} + +static void cfg80211_process_deauth(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; const u8 *bssid = mgmt->bssid; - bool was_current = false; - - if (wdev->current_bss && - ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - was_current = true; - } + u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); + bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr); - nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL); + nl80211_send_deauth(rdev, wdev->netdev, buf, len, GFP_KERNEL); - if (wdev->sme_state == CFG80211_SME_CONNECTED && was_current) { - u16 reason_code; - bool from_ap; - - reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); + if (!wdev->current_bss || + !ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) + return; - from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); - __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); - } else if (wdev->sme_state == CFG80211_SME_CONNECTING) { - __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); - } + __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap); + cfg80211_sme_deauth(wdev); } -static void cfg80211_process_disassoc(struct net_device *dev, +static void cfg80211_process_disassoc(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; const u8 *bssid = mgmt->bssid; - u16 reason_code; - bool from_ap; + u16 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); + bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr); - nl80211_send_disassoc(rdev, dev, buf, len, GFP_KERNEL); + nl80211_send_disassoc(rdev, wdev->netdev, buf, len, GFP_KERNEL); - if (wdev->sme_state != CFG80211_SME_CONNECTED) + if (WARN_ON(!wdev->current_bss || + !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) return; - if (wdev->current_bss && - ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { - cfg80211_sme_disassoc(dev, wdev->current_bss); - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - } else - WARN_ON(1); - - reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); - - from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); - __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); + __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap); + cfg80211_sme_disassoc(wdev); } void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct ieee80211_mgmt *mgmt = (void *)buf; ASSERT_WDEV_LOCK(wdev); @@ -153,14 +109,12 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) if (WARN_ON(len < 2)) return; - if (ieee80211_is_auth(mgmt->frame_control)) { - nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); - cfg80211_sme_rx_auth(dev, buf, len); - } else if (ieee80211_is_deauth(mgmt->frame_control)) { - cfg80211_process_deauth(dev, buf, len); - } else if (ieee80211_is_disassoc(mgmt->frame_control)) { - cfg80211_process_disassoc(dev, buf, len); - } + if (ieee80211_is_auth(mgmt->frame_control)) + cfg80211_process_auth(wdev, buf, len); + else if (ieee80211_is_deauth(mgmt->frame_control)) + cfg80211_process_deauth(wdev, buf, len); + else if (ieee80211_is_disassoc(mgmt->frame_control)) + cfg80211_process_disassoc(wdev, buf, len); } EXPORT_SYMBOL(cfg80211_rx_mlme_mgmt); @@ -173,10 +127,7 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr) trace_cfg80211_send_auth_timeout(dev, addr); nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); - if (wdev->sme_state == CFG80211_SME_CONNECTING) - __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); + cfg80211_sme_auth_timeout(wdev); } EXPORT_SYMBOL(cfg80211_auth_timeout); @@ -189,10 +140,7 @@ void cfg80211_assoc_timeout(struct net_device *dev, const u8 *addr) trace_cfg80211_send_assoc_timeout(dev, addr); nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL); - if (wdev->sme_state == CFG80211_SME_CONNECTING) - __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); + cfg80211_sme_assoc_timeout(wdev); } EXPORT_SYMBOL(cfg80211_assoc_timeout); @@ -209,9 +157,9 @@ void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) return; if (ieee80211_is_deauth(mgmt->frame_control)) - cfg80211_process_deauth(dev, buf, len); + cfg80211_process_deauth(wdev, buf, len); else - cfg80211_process_disassoc(dev, buf, len); + cfg80211_process_disassoc(wdev, buf, len); } EXPORT_SYMBOL(cfg80211_tx_mlme_mgmt); @@ -336,21 +284,12 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - bool was_connected = false; ASSERT_WDEV_LOCK(wdev); - if (wdev->current_bss && req->prev_bssid && - ether_addr_equal(wdev->current_bss->pub.bssid, req->prev_bssid)) { - /* - * Trying to reassociate: Allow this to proceed and let the old - * association to be dropped when the new one is completed. - */ - if (wdev->sme_state == CFG80211_SME_CONNECTED) { - was_connected = true; - wdev->sme_state = CFG80211_SME_CONNECTING; - } - } else if (wdev->current_bss) + if (wdev->current_bss && + (!req->prev_bssid || !ether_addr_equal(wdev->current_bss->pub.bssid, + req->prev_bssid))) return -EALREADY; cfg80211_oper_and_ht_capa(&req->ht_capa_mask, @@ -360,11 +299,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - if (!req->bss) { - if (was_connected) - wdev->sme_state = CFG80211_SME_CONNECTED; + if (!req->bss) return -ENOENT; - } err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED); if (err) @@ -373,11 +309,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, err = rdev_assoc(rdev, dev, req); out: - if (err) { - if (was_connected) - wdev->sme_state = CFG80211_SME_CONNECTED; + if (err) cfg80211_put_bss(&rdev->wiphy, req->bss); - } return err; } @@ -398,8 +331,9 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (local_state_change && (!wdev->current_bss || - !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + if (local_state_change && + (!wdev->current_bss || + !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) return 0; return rdev_deauth(rdev, dev, &req); @@ -417,13 +351,11 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, .ie = ie, .ie_len = ie_len, }; + int err; ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state != CFG80211_SME_CONNECTED) - return -ENOTCONN; - - if (WARN(!wdev->current_bss, "sme_state=%d\n", wdev->sme_state)) + if (!wdev->current_bss) return -ENOTCONN; if (ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) @@ -431,7 +363,13 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, else return -ENOTCONN; - return rdev_disassoc(rdev, dev, &req); + err = rdev_disassoc(rdev, dev, &req); + if (err) + return err; + + /* driver should have reported the disassoc */ + WARN_ON(wdev->current_bss); + return 0; } void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, @@ -439,10 +377,6 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, { struct wireless_dev *wdev = dev->ieee80211_ptr; u8 bssid[ETH_ALEN]; - struct cfg80211_deauth_request req = { - .reason_code = WLAN_REASON_DEAUTH_LEAVING, - .bssid = bssid, - }; ASSERT_WDEV_LOCK(wdev); @@ -453,13 +387,8 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, return; memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); - rdev_deauth(rdev, dev, &req); - - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&rdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - } + cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); } struct cfg80211_mgmt_registration { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 444f5effb77..88e820b7367 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -800,12 +800,9 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) case NL80211_IFTYPE_MESH_POINT: break; case NL80211_IFTYPE_ADHOC: - if (!wdev->current_bss) - return -ENOLINK; - break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (wdev->sme_state != CFG80211_SME_CONNECTED) + if (!wdev->current_bss) return -ENOLINK; break; default: diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 81be95f3be7..ae7e2cbf45c 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1,5 +1,7 @@ /* - * SME code for cfg80211's connect emulation. + * SME code for cfg80211 + * both driver SME event handling and the SME implementation + * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg * Copyright (C) 2009 Intel Corporation. All rights reserved. @@ -18,18 +20,24 @@ #include "reg.h" #include "rdev-ops.h" +/* + * Software SME in cfg80211, using auth/assoc/deauth calls to the + * driver. This is is for implementing nl80211's connect/disconnect + * and wireless extensions (if configured.) + */ + struct cfg80211_conn { struct cfg80211_connect_params params; /* these are sub-states of the _CONNECTING sme_state */ enum { - CFG80211_CONN_IDLE, CFG80211_CONN_SCANNING, CFG80211_CONN_SCAN_AGAIN, CFG80211_CONN_AUTHENTICATE_NEXT, CFG80211_CONN_AUTHENTICATING, CFG80211_CONN_ASSOCIATE_NEXT, CFG80211_CONN_ASSOCIATING, - CFG80211_CONN_DEAUTH_ASSOC_FAIL, + CFG80211_CONN_DEAUTH, + CFG80211_CONN_CONNECTED, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; u8 *ie; @@ -37,39 +45,16 @@ struct cfg80211_conn { bool auto_auth, prev_bssid_valid; }; -static bool cfg80211_is_all_idle(void) +static void cfg80211_sme_free(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev; - struct wireless_dev *wdev; - bool is_all_idle = true; - - /* - * All devices must be idle as otherwise if you are actively - * scanning some new beacon hints could be learned and would - * count as new regulatory hints. - */ - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { - list_for_each_entry(wdev, &rdev->wdev_list, list) { - wdev_lock(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) - is_all_idle = false; - wdev_unlock(wdev); - } - } - - return is_all_idle; -} + if (!wdev->conn) + return; -static void disconnect_work(struct work_struct *work) -{ - rtnl_lock(); - if (cfg80211_is_all_idle()) - regulatory_hint_disconnect(); - rtnl_unlock(); + kfree(wdev->conn->ie); + kfree(wdev->conn); + wdev->conn = NULL; } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); - static int cfg80211_conn_scan(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -164,6 +149,9 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) params = &wdev->conn->params; switch (wdev->conn->state) { + case CFG80211_CONN_SCANNING: + /* didn't find it during scan ... */ + return -ENOENT; case CFG80211_CONN_SCAN_AGAIN: return cfg80211_conn_scan(wdev); case CFG80211_CONN_AUTHENTICATE_NEXT: @@ -200,12 +188,11 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) WLAN_REASON_DEAUTH_LEAVING, false); return err; - case CFG80211_CONN_DEAUTH_ASSOC_FAIL: + case CFG80211_CONN_DEAUTH: cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); - /* return an error so that we call __cfg80211_connect_result() */ - return -EINVAL; + return 0; default: return 0; } @@ -229,7 +216,8 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); continue; } - if (wdev->sme_state != CFG80211_SME_CONNECTING || !wdev->conn) { + if (!wdev->conn || + wdev->conn->state == CFG80211_CONN_CONNECTED) { wdev_unlock(wdev); continue; } @@ -237,12 +225,14 @@ void cfg80211_conn_work(struct work_struct *work) memcpy(bssid_buf, wdev->conn->params.bssid, ETH_ALEN); bssid = bssid_buf; } - if (cfg80211_conn_do_work(wdev)) + if (cfg80211_conn_do_work(wdev)) { __cfg80211_connect_result( wdev->netdev, bssid, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); + cfg80211_sme_free(wdev); + } wdev_unlock(wdev); } @@ -286,9 +276,6 @@ static void __cfg80211_sme_scan_done(struct net_device *dev) ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state != CFG80211_SME_CONNECTING) - return; - if (!wdev->conn) return; @@ -297,20 +284,10 @@ static void __cfg80211_sme_scan_done(struct net_device *dev) return; bss = cfg80211_get_conn_bss(wdev); - if (bss) { + if (bss) cfg80211_put_bss(&rdev->wiphy, bss); - } else { - /* not found */ - if (wdev->conn->state == CFG80211_CONN_SCAN_AGAIN) - schedule_work(&rdev->conn_work); - else - __cfg80211_connect_result( - wdev->netdev, - wdev->conn->params.bssid, - NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); - } + else + schedule_work(&rdev->conn_work); } void cfg80211_sme_scan_done(struct net_device *dev) @@ -322,10 +299,8 @@ void cfg80211_sme_scan_done(struct net_device *dev) wdev_unlock(wdev); } -void cfg80211_sme_rx_auth(struct net_device *dev, - const u8 *buf, size_t len) +void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; @@ -333,11 +308,7 @@ void cfg80211_sme_rx_auth(struct net_device *dev, ASSERT_WDEV_LOCK(wdev); - /* should only RX auth frames when connecting */ - if (wdev->sme_state != CFG80211_SME_CONNECTING) - return; - - if (WARN_ON(!wdev->conn)) + if (!wdev->conn || wdev->conn->state == CFG80211_CONN_CONNECTED) return; if (status_code == WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG && @@ -366,46 +337,226 @@ void cfg80211_sme_rx_auth(struct net_device *dev, wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; schedule_work(&rdev->conn_work); } else if (status_code != WLAN_STATUS_SUCCESS) { - __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0, + __cfg80211_connect_result(wdev->netdev, mgmt->bssid, + NULL, 0, NULL, 0, status_code, false, NULL); - } else if (wdev->sme_state == CFG80211_SME_CONNECTING && - wdev->conn->state == CFG80211_CONN_AUTHENTICATING) { + } else if (wdev->conn->state == CFG80211_CONN_AUTHENTICATING) { wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; schedule_work(&rdev->conn_work); } } -bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev) +bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status) { - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - if (WARN_ON(!wdev->conn)) + if (!wdev->conn) return false; - if (!wdev->conn->prev_bssid_valid) + if (status == WLAN_STATUS_SUCCESS) { + wdev->conn->state = CFG80211_CONN_CONNECTED; return false; + } - /* - * Some stupid APs don't accept reassoc, so we - * need to fall back to trying regular assoc. - */ - wdev->conn->prev_bssid_valid = false; - wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; + if (wdev->conn->prev_bssid_valid) { + /* + * Some stupid APs don't accept reassoc, so we + * need to fall back to trying regular assoc; + * return true so no event is sent to userspace. + */ + wdev->conn->prev_bssid_valid = false; + wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; + schedule_work(&rdev->conn_work); + return true; + } + + wdev->conn->state = CFG80211_CONN_DEAUTH; schedule_work(&rdev->conn_work); + return false; +} - return true; +void cfg80211_sme_deauth(struct wireless_dev *wdev) +{ + cfg80211_sme_free(wdev); } -void cfg80211_sme_failed_assoc(struct wireless_dev *wdev) +void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) { - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + cfg80211_sme_free(wdev); +} - wdev->conn->state = CFG80211_CONN_DEAUTH_ASSOC_FAIL; +void cfg80211_sme_disassoc(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_DEAUTH; schedule_work(&rdev->conn_work); } +void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) +{ + cfg80211_sme_disassoc(wdev); +} + +static int cfg80211_sme_connect(struct wireless_dev *wdev, + struct cfg80211_connect_params *connect, + const u8 *prev_bssid) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_bss *bss; + int err; + + if (!rdev->ops->auth || !rdev->ops->assoc) + return -EOPNOTSUPP; + + if (wdev->current_bss) + return -EALREADY; + + if (WARN_ON(wdev->conn)) + return -EINPROGRESS; + + wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL); + if (!wdev->conn) + return -ENOMEM; + + /* + * Copy all parameters, and treat explicitly IEs, BSSID, SSID. + */ + memcpy(&wdev->conn->params, connect, sizeof(*connect)); + if (connect->bssid) { + wdev->conn->params.bssid = wdev->conn->bssid; + memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN); + } + + if (connect->ie) { + wdev->conn->ie = kmemdup(connect->ie, connect->ie_len, + GFP_KERNEL); + wdev->conn->params.ie = wdev->conn->ie; + if (!wdev->conn->ie) { + kfree(wdev->conn); + wdev->conn = NULL; + return -ENOMEM; + } + } + + if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) { + wdev->conn->auto_auth = true; + /* start with open system ... should mostly work */ + wdev->conn->params.auth_type = + NL80211_AUTHTYPE_OPEN_SYSTEM; + } else { + wdev->conn->auto_auth = false; + } + + wdev->conn->params.ssid = wdev->ssid; + wdev->conn->params.ssid_len = connect->ssid_len; + + /* see if we have the bss already */ + bss = cfg80211_get_conn_bss(wdev); + + if (prev_bssid) { + memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN); + wdev->conn->prev_bssid_valid = true; + } + + /* we're good if we have a matching bss struct */ + if (bss) { + wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; + err = cfg80211_conn_do_work(wdev); + cfg80211_put_bss(wdev->wiphy, bss); + } else { + /* otherwise we'll need to scan for the AP first */ + err = cfg80211_conn_scan(wdev); + + /* + * If we can't scan right now, then we need to scan again + * after the current scan finished, since the parameters + * changed (unless we find a good AP anyway). + */ + if (err == -EBUSY) { + err = 0; + wdev->conn->state = CFG80211_CONN_SCAN_AGAIN; + } + } + + if (err) + cfg80211_sme_free(wdev); + + return err; +} + +static int cfg80211_sme_disconnect(struct wireless_dev *wdev, u16 reason) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + int err; + + if (!wdev->conn) + return 0; + + if (!rdev->ops->deauth) + return -EOPNOTSUPP; + + if (wdev->conn->state == CFG80211_CONN_SCANNING || + wdev->conn->state == CFG80211_CONN_SCAN_AGAIN) { + err = 0; + goto out; + } + + /* wdev->conn->params.bssid must be set if > SCANNING */ + err = cfg80211_mlme_deauth(rdev, wdev->netdev, + wdev->conn->params.bssid, + NULL, 0, reason, false); + out: + cfg80211_sme_free(wdev); + return err; +} + +/* + * code shared for in-device and software SME + */ + +static bool cfg80211_is_all_idle(void) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + bool is_all_idle = true; + + /* + * All devices must be idle as otherwise if you are actively + * scanning some new beacon hints could be learned and would + * count as new regulatory hints. + */ + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { + wdev_lock(wdev); + if (wdev->conn || wdev->current_bss) + is_all_idle = false; + wdev_unlock(wdev); + } + } + + return is_all_idle; +} + +static void disconnect_work(struct work_struct *work) +{ + rtnl_lock(); + if (cfg80211_is_all_idle()) + regulatory_hint_disconnect(); + rtnl_unlock(); +} + +static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); + + +/* + * API calls for drivers implementing connect/disconnect and + * SME event handling + */ + void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len, @@ -424,9 +575,6 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; - if (wdev->sme_state != CFG80211_SME_CONNECTING) - return; - nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, @@ -463,15 +611,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, wdev->current_bss = NULL; } - if (wdev->conn) - wdev->conn->state = CFG80211_CONN_IDLE; - if (status != WLAN_STATUS_SUCCESS) { - wdev->sme_state = CFG80211_SME_IDLE; - if (wdev->conn) - kfree(wdev->conn->ie); - kfree(wdev->conn); - wdev->conn = NULL; kfree(wdev->connect_keys); wdev->connect_keys = NULL; wdev->ssid_len = 0; @@ -480,21 +620,16 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, } if (!bss) - bss = cfg80211_get_bss(wdev->wiphy, - wdev->conn ? wdev->conn->params.channel : - NULL, - bssid, + bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, wdev->ssid, wdev->ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - if (WARN_ON(!bss)) return; cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - wdev->sme_state = CFG80211_SME_CONNECTED; cfg80211_upload_connect_keys(wdev); rcu_read_lock(); @@ -530,8 +665,6 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, struct cfg80211_event *ev; unsigned long flags; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING); - ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp); if (!ev) return; @@ -572,13 +705,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) goto out; - if (wdev->sme_state != CFG80211_SME_CONNECTED) - goto out; - - /* internal error -- how did we get to CONNECTED w/o BSS? */ - if (WARN_ON(!wdev->current_bss)) { + if (WARN_ON(!wdev->current_bss)) goto out; - } cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); @@ -628,8 +756,6 @@ void cfg80211_roamed(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_bss *bss; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED); - bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid, wdev->ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); @@ -651,8 +777,6 @@ void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_event *ev; unsigned long flags; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED); - if (WARN_ON(!bss)) return; @@ -694,25 +818,14 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; - if (wdev->sme_state != CFG80211_SME_CONNECTED) - return; - if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); } wdev->current_bss = NULL; - wdev->sme_state = CFG80211_SME_IDLE; wdev->ssid_len = 0; - if (wdev->conn) { - kfree(wdev->conn->ie); - wdev->conn->ie = NULL; - kfree(wdev->conn); - wdev->conn = NULL; - } - nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap); /* @@ -741,8 +854,6 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, struct cfg80211_event *ev; unsigned long flags; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED); - ev = kzalloc(sizeof(*ev) + ie_len, gfp); if (!ev) return; @@ -760,6 +871,9 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, } EXPORT_SYMBOL(cfg80211_disconnected); +/* + * API calls for nl80211/wext compatibility code + */ int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_connect_params *connect, @@ -767,14 +881,10 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, const u8 *prev_bssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_bss *bss = NULL; int err; ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) - return -EALREADY; - if (WARN_ON(wdev->connect_keys)) { kfree(wdev->connect_keys); wdev->connect_keys = NULL; @@ -810,105 +920,22 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, } } - if (!rdev->ops->connect) { - if (!rdev->ops->auth || !rdev->ops->assoc) - return -EOPNOTSUPP; - - if (WARN_ON(wdev->conn)) - return -EINPROGRESS; - - wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL); - if (!wdev->conn) - return -ENOMEM; - - /* - * Copy all parameters, and treat explicitly IEs, BSSID, SSID. - */ - memcpy(&wdev->conn->params, connect, sizeof(*connect)); - if (connect->bssid) { - wdev->conn->params.bssid = wdev->conn->bssid; - memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN); - } + wdev->connect_keys = connkeys; + memcpy(wdev->ssid, connect->ssid, connect->ssid_len); + wdev->ssid_len = connect->ssid_len; - if (connect->ie) { - wdev->conn->ie = kmemdup(connect->ie, connect->ie_len, - GFP_KERNEL); - wdev->conn->params.ie = wdev->conn->ie; - if (!wdev->conn->ie) { - kfree(wdev->conn); - wdev->conn = NULL; - return -ENOMEM; - } - } - - if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) { - wdev->conn->auto_auth = true; - /* start with open system ... should mostly work */ - wdev->conn->params.auth_type = - NL80211_AUTHTYPE_OPEN_SYSTEM; - } else { - wdev->conn->auto_auth = false; - } - - memcpy(wdev->ssid, connect->ssid, connect->ssid_len); - wdev->ssid_len = connect->ssid_len; - wdev->conn->params.ssid = wdev->ssid; - wdev->conn->params.ssid_len = connect->ssid_len; - - /* see if we have the bss already */ - bss = cfg80211_get_conn_bss(wdev); - - wdev->sme_state = CFG80211_SME_CONNECTING; - wdev->connect_keys = connkeys; - - if (prev_bssid) { - memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN); - wdev->conn->prev_bssid_valid = true; - } - - /* we're good if we have a matching bss struct */ - if (bss) { - wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; - err = cfg80211_conn_do_work(wdev); - cfg80211_put_bss(wdev->wiphy, bss); - } else { - /* otherwise we'll need to scan for the AP first */ - err = cfg80211_conn_scan(wdev); - /* - * If we can't scan right now, then we need to scan again - * after the current scan finished, since the parameters - * changed (unless we find a good AP anyway). - */ - if (err == -EBUSY) { - err = 0; - wdev->conn->state = CFG80211_CONN_SCAN_AGAIN; - } - } - if (err) { - kfree(wdev->conn->ie); - kfree(wdev->conn); - wdev->conn = NULL; - wdev->sme_state = CFG80211_SME_IDLE; - wdev->connect_keys = NULL; - wdev->ssid_len = 0; - } - - return err; - } else { - wdev->sme_state = CFG80211_SME_CONNECTING; - wdev->connect_keys = connkeys; + if (!rdev->ops->connect) + err = cfg80211_sme_connect(wdev, connect, prev_bssid); + else err = rdev_connect(rdev, dev, connect); - if (err) { - wdev->connect_keys = NULL; - wdev->sme_state = CFG80211_SME_IDLE; - return err; - } - memcpy(wdev->ssid, connect->ssid, connect->ssid_len); - wdev->ssid_len = connect->ssid_len; - - return 0; + if (err) { + wdev->connect_keys = NULL; + wdev->ssid_len = 0; + return err; } + + return 0; } int cfg80211_disconnect(struct cfg80211_registered_device *rdev, @@ -919,78 +946,17 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state == CFG80211_SME_IDLE) - return -EINVAL; - kfree(wdev->connect_keys); wdev->connect_keys = NULL; - if (!rdev->ops->disconnect) { - if (!rdev->ops->deauth) - return -EOPNOTSUPP; - - /* was it connected by userspace SME? */ - if (!wdev->conn) { - cfg80211_mlme_down(rdev, dev); - goto disconnect; - } - - if (wdev->sme_state == CFG80211_SME_CONNECTING && - (wdev->conn->state == CFG80211_CONN_SCANNING || - wdev->conn->state == CFG80211_CONN_SCAN_AGAIN)) { - wdev->sme_state = CFG80211_SME_IDLE; - kfree(wdev->conn->ie); - kfree(wdev->conn); - wdev->conn = NULL; - wdev->ssid_len = 0; - return 0; - } - - /* wdev->conn->params.bssid must be set if > SCANNING */ - err = cfg80211_mlme_deauth(rdev, dev, - wdev->conn->params.bssid, - NULL, 0, reason, false); - if (err) - return err; + if (wdev->conn) { + err = cfg80211_sme_disconnect(wdev, reason); + } else if (!rdev->ops->disconnect) { + cfg80211_mlme_down(rdev, dev); + err = 0; } else { err = rdev_disconnect(rdev, dev, reason); - if (err) - return err; } - disconnect: - if (wdev->sme_state == CFG80211_SME_CONNECTED) - __cfg80211_disconnected(dev, NULL, 0, 0, false); - else if (wdev->sme_state == CFG80211_SME_CONNECTING) - __cfg80211_connect_result(dev, NULL, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - wextev, NULL); - - return 0; -} - -void cfg80211_sme_disassoc(struct net_device *dev, - struct cfg80211_internal_bss *bss) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - u8 bssid[ETH_ALEN]; - - ASSERT_WDEV_LOCK(wdev); - - if (!wdev->conn) - return; - - if (wdev->conn->state == CFG80211_CONN_IDLE) - return; - - /* - * Ok, so the association was made by this SME -- we don't - * want it any more so deauthenticate too. - */ - - memcpy(bssid, bss->pub.bssid, ETH_ALEN); - - cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, false); + return err; } diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index a53f8404f45..14c9a2583ba 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -89,7 +89,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, wdev_lock(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) { + if (wdev->conn) { bool event = true; if (wdev->wext.connect.channel == chan) { @@ -188,7 +188,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = 0; - if (wdev->sme_state != CFG80211_SME_IDLE) { + if (wdev->conn) { bool event = true; if (wdev->wext.connect.ssid && len && @@ -277,7 +277,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, wdev_lock(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) { + if (wdev->conn) { err = 0; /* both automatic */ if (!bssid && !wdev->wext.connect.bssid) @@ -364,7 +364,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev, wdev->wext.ie = ie; wdev->wext.ie_len = ie_len; - if (wdev->sme_state != CFG80211_SME_IDLE) { + if (wdev->conn) { err = cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, false); if (err) -- cgit v1.2.3 From 3430140ad9da9ec1caaf800af6b0378351919f9c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Jun 2013 14:35:07 +0200 Subject: regulatory: use proper enum return value get_reg_request_treatment() returns 0 in one case but is defined to return an enum, use the proper value REG_REQ_OK. Signed-off-by: Johannes Berg --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index e1d6749234c..5a24c986f34 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1345,7 +1345,7 @@ get_reg_request_treatment(struct wiphy *wiphy, return REG_REQ_OK; return REG_REQ_ALREADY_SET; } - return 0; + return REG_REQ_OK; case NL80211_REGDOM_SET_BY_DRIVER: if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) { if (regdom_changes(pending_request->alpha2)) -- cgit v1.2.3 From 17ef66afc0bdbbdc5c526db5e24bdd2dc3df1205 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 31 May 2013 15:05:48 +0000 Subject: net: ipv6: Unify {raw,udp}6_sock_seq_show. udp6_sock_seq_show and raw6_sock_seq_show are identical, except the UDP version displays ports and the raw version displays the protocol. Refactor most of the code in these two functions into a new common ip6_dgram_sock_seq_show function, in preparation for using it to display ICMPv6 sockets as well. Also reduce the indentation in parts of include/net/transp_v6.h to improve readability. Compiles and displays reasonable results with CONFIG_IPV6={n,m,y} Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 27 +++++++++++++++++++++++++++ net/ipv6/raw.c | 45 ++++++++------------------------------------- net/ipv6/udp.c | 49 +++++++++---------------------------------------- 3 files changed, 44 insertions(+), 77 deletions(-) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 4b56cbbc789..197e6f4a2b7 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -879,3 +879,30 @@ exit_f: return err; } EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); + +void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, + __u16 srcp, __u16 destp, int bucket) +{ + struct ipv6_pinfo *np = inet6_sk(sp); + const struct in6_addr *dest, *src; + + dest = &np->daddr; + src = &np->rcv_saddr; + seq_printf(seq, + "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", + bucket, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp, + sp->sk_state, + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, + sock_i_ino(sp), + atomic_read(&sp->sk_refcnt), sp, + atomic_read(&sp->sk_drops)); +} diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4f8886aa842..c45f7a5c36e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1227,45 +1227,16 @@ struct proto rawv6_prot = { }; #ifdef CONFIG_PROC_FS -static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) -{ - struct ipv6_pinfo *np = inet6_sk(sp); - const struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = 0; - srcp = inet_sk(sp)->inet_num; - seq_printf(seq, - "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", - i, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - sk_wmem_alloc_get(sp), - sk_rmem_alloc_get(sp), - 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), - 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); -} - static int raw6_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode ref pointer drops\n"); - else - raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + struct sock *sp = v; + __u16 srcp = inet_sk(sp)->inet_num; + ip6_dgram_sock_seq_show(seq, v, srcp, 0, + raw_seq_private(seq)->bucket); + } return 0; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 42923b14dfa..b5808539cd5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1359,48 +1359,17 @@ static const struct inet6_protocol udpv6_protocol = { /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS - -static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) -{ - struct inet_sock *inet = inet_sk(sp); - struct ipv6_pinfo *np = inet6_sk(sp); - const struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = ntohs(inet->inet_dport); - srcp = ntohs(inet->inet_sport); - seq_printf(seq, - "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", - bucket, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - sk_wmem_alloc_get(sp), - sk_rmem_alloc_get(sp), - 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), - 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops)); -} - int udp6_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode ref pointer drops\n"); - else - udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + int bucket = ((struct udp_iter_state *)seq->private)->bucket; + struct inet_sock *inet = inet_sk(v); + __u16 srcp = ntohs(inet->inet_sport); + __u16 destp = ntohs(inet->inet_dport); + ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); + } return 0; } -- cgit v1.2.3 From 8cc785f6f429c2a3fb81745dc142cbd72a462c4a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 31 May 2013 15:05:49 +0000 Subject: net: ipv4: make the ping /proc code AF-independent Introduce a ping_seq_afinfo structure (similar to its UDP equivalent) and use it to make some of the ping /proc functions address-family independent. Rename the remaining ping /proc functions from ping_* to ping_v4_*. Compiles and displays reasonable results with CONFIG_IPV6={n,m,y} Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ping.c | 73 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 71f6ad02fa6..8c2da9b8cd8 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1001,7 +1001,8 @@ static struct sock *ping_get_first(struct seq_file *seq, int start) continue; sk_nulls_for_each(sk, node, hslot) { - if (net_eq(sock_net(sk), net)) + if (net_eq(sock_net(sk), net) && + sk->sk_family == state->family) goto found; } } @@ -1034,16 +1035,23 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) return pos ? NULL : sk; } -static void *ping_seq_start(struct seq_file *seq, loff_t *pos) +static void *ping_seq_start(struct seq_file *seq, loff_t *pos, + sa_family_t family) { struct ping_iter_state *state = seq->private; state->bucket = 0; + state->family = family; read_lock_bh(&ping_table.lock); return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } +static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) +{ + return ping_seq_start(seq, pos, AF_INET); +} + static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; @@ -1062,7 +1070,7 @@ static void ping_seq_stop(struct seq_file *seq, void *v) read_unlock_bh(&ping_table.lock); } -static void ping_format_sock(struct sock *sp, struct seq_file *f, +static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, int bucket, int *len) { struct inet_sock *inet = inet_sk(sp); @@ -1083,7 +1091,7 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f, atomic_read(&sp->sk_drops), len); } -static int ping_seq_show(struct seq_file *seq, void *v) +static int ping_v4_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, "%-127s\n", @@ -1094,22 +1102,23 @@ static int ping_seq_show(struct seq_file *seq, void *v) struct ping_iter_state *state = seq->private; int len; - ping_format_sock(v, seq, state->bucket, &len); + ping_v4_format_sock(v, seq, state->bucket, &len); seq_printf(seq, "%*s\n", 127 - len, ""); } return 0; } -static const struct seq_operations ping_seq_ops = { - .show = ping_seq_show, - .start = ping_seq_start, +static const struct seq_operations ping_v4_seq_ops = { + .show = ping_v4_seq_show, + .start = ping_v4_seq_start, .next = ping_seq_next, .stop = ping_seq_stop, }; static int ping_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &ping_seq_ops, + struct ping_seq_afinfo *afinfo = PDE_DATA(inode); + return seq_open_net(inode, file, &afinfo->seq_ops, sizeof(struct ping_iter_state)); } @@ -1120,46 +1129,58 @@ static const struct file_operations ping_seq_fops = { .release = seq_release_net, }; -static int ping_proc_register(struct net *net) +static struct ping_seq_afinfo ping_v4_seq_afinfo = { + .name = "icmp", + .family = AF_INET, + .seq_fops = &ping_seq_fops, + .seq_ops = { + .start = ping_v4_seq_start, + .show = ping_v4_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, + }, +}; + +static int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) { struct proc_dir_entry *p; - int rc = 0; - - p = proc_create("icmp", S_IRUGO, net->proc_net, &ping_seq_fops); + p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, + afinfo->seq_fops, afinfo); if (!p) - rc = -ENOMEM; - return rc; + return -ENOMEM; + return 0; } -static void ping_proc_unregister(struct net *net) +static void ping_proc_unregister(struct net *net, + struct ping_seq_afinfo *afinfo) { - remove_proc_entry("icmp", net->proc_net); + remove_proc_entry(afinfo->name, net->proc_net); } -static int __net_init ping_proc_init_net(struct net *net) +static int __net_init ping_v4_proc_init_net(struct net *net) { - return ping_proc_register(net); + return ping_proc_register(net, &ping_v4_seq_afinfo); } -static void __net_exit ping_proc_exit_net(struct net *net) +static void __net_exit ping_v4_proc_exit_net(struct net *net) { - ping_proc_unregister(net); + ping_proc_unregister(net, &ping_v4_seq_afinfo); } -static struct pernet_operations ping_net_ops = { - .init = ping_proc_init_net, - .exit = ping_proc_exit_net, +static struct pernet_operations ping_v4_net_ops = { + .init = ping_v4_proc_init_net, + .exit = ping_v4_proc_exit_net, }; int __init ping_proc_init(void) { - return register_pernet_subsys(&ping_net_ops); + return register_pernet_subsys(&ping_v4_net_ops); } void ping_proc_exit(void) { - unregister_pernet_subsys(&ping_net_ops); + unregister_pernet_subsys(&ping_v4_net_ops); } #endif -- cgit v1.2.3 From d862e546142328d18377a4704be97f2ae301847a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 31 May 2013 15:05:50 +0000 Subject: net: ipv6: Implement /proc/net/icmp6. The format is based on /proc/net/icmp and /proc/net/{udp,raw}6. Compiles and displays reasonable results with CONFIG_IPV6={n,m,y} Couldn't figure out how to test without CONFIG_PROC_FS enabled. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ping.c | 21 +++++++----- net/ipv6/ping.c | 102 +++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 91 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8c2da9b8cd8..3552a45a6f8 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1035,8 +1035,7 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) return pos ? NULL : sk; } -static void *ping_seq_start(struct seq_file *seq, loff_t *pos, - sa_family_t family) +void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) { struct ping_iter_state *state = seq->private; state->bucket = 0; @@ -1046,13 +1045,14 @@ static void *ping_seq_start(struct seq_file *seq, loff_t *pos, return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } +EXPORT_SYMBOL_GPL(ping_seq_start); static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) { return ping_seq_start(seq, pos, AF_INET); } -static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) +void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; @@ -1064,11 +1064,13 @@ static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } +EXPORT_SYMBOL_GPL(ping_seq_next); -static void ping_seq_stop(struct seq_file *seq, void *v) +void ping_seq_stop(struct seq_file *seq, void *v) { read_unlock_bh(&ping_table.lock); } +EXPORT_SYMBOL_GPL(ping_seq_stop); static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, int bucket, int *len) @@ -1122,12 +1124,13 @@ static int ping_seq_open(struct inode *inode, struct file *file) sizeof(struct ping_iter_state)); } -static const struct file_operations ping_seq_fops = { +const struct file_operations ping_seq_fops = { .open = ping_seq_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_net, }; +EXPORT_SYMBOL_GPL(ping_seq_fops); static struct ping_seq_afinfo ping_v4_seq_afinfo = { .name = "icmp", @@ -1141,7 +1144,7 @@ static struct ping_seq_afinfo ping_v4_seq_afinfo = { }, }; -static int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) +int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) { struct proc_dir_entry *p; p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, @@ -1150,13 +1153,13 @@ static int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) return -ENOMEM; return 0; } +EXPORT_SYMBOL_GPL(ping_proc_register); -static void ping_proc_unregister(struct net *net, - struct ping_seq_afinfo *afinfo) +void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo) { remove_proc_entry(afinfo->name, net->proc_net); } - +EXPORT_SYMBOL_GPL(ping_proc_unregister); static int __net_init ping_v4_proc_init_net(struct net *net) { diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index a6462d657c1..62ac5f2e0aa 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -78,29 +78,6 @@ int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int __init pingv6_init(void) -{ - pingv6_ops.ipv6_recv_error = ipv6_recv_error; - pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; - pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; - pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; - pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; - return inet6_register_protosw(&pingv6_protosw); -} - -/* This never gets called because it's not possible to unload the ipv6 module, - * but just in case. - */ -void pingv6_exit(void) -{ - pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; - pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; - pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; - pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; - pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; - inet6_unregister_protosw(&pingv6_protosw); -} - int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { @@ -214,3 +191,82 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return err; } + +#ifdef CONFIG_PROC_FS +static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) +{ + return ping_seq_start(seq, pos, AF_INET6); +} + +int ping_v6_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + int bucket = ((struct ping_iter_state *) seq->private)->bucket; + struct inet_sock *inet = inet_sk(v); + __u16 srcp = ntohs(inet->inet_sport); + __u16 destp = ntohs(inet->inet_dport); + ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); + } + return 0; +} + +static struct ping_seq_afinfo ping_v6_seq_afinfo = { + .name = "icmp6", + .family = AF_INET6, + .seq_fops = &ping_seq_fops, + .seq_ops = { + .start = ping_v6_seq_start, + .show = ping_v6_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, + }, +}; + +static int __net_init ping_v6_proc_init_net(struct net *net) +{ + return ping_proc_register(net, &ping_v6_seq_afinfo); +} + +static void __net_init ping_v6_proc_exit_net(struct net *net) +{ + return ping_proc_unregister(net, &ping_v6_seq_afinfo); +} + +static struct pernet_operations ping_v6_net_ops = { + .init = ping_v6_proc_init_net, + .exit = ping_v6_proc_exit_net, +}; +#endif + +int __init pingv6_init(void) +{ +#ifdef CONFIG_PROC_FS + int ret = register_pernet_subsys(&ping_v6_net_ops); + if (ret) + return ret; +#endif + pingv6_ops.ipv6_recv_error = ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; + return inet6_register_protosw(&pingv6_protosw); +} + +/* This never gets called because it's not possible to unload the ipv6 module, + * but just in case. + */ +void pingv6_exit(void) +{ + pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; +#ifdef CONFIG_PROC_FS + unregister_pernet_subsys(&ping_v6_net_ops); +#endif + inet6_unregister_protosw(&pingv6_protosw); +} -- cgit v1.2.3 From 256c90dedf538c59c70e65ba1a1340ce793c5b37 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Jun 2013 19:21:08 +0200 Subject: cfg80211: fix potential deadlock regression My big locking cleanups caused a problem by registering the rfkill instance with the RTNL held, while the callback also acquires the RTNL. This potentially causes a deadlock since the two locks used (rfkill mutex and RTNL) can be acquired in two different orders. Fix this by (un)registering rfkill without holding the RTNL. This needs to be done after the device struct is registered, but that can also be done w/o holding the RTNL. Signed-off-by: Johannes Berg --- net/wireless/core.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 221e76b53a9..99d86ddb633 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -555,14 +555,18 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); - rtnl_lock(); res = device_add(&rdev->wiphy.dev); + if (res) + return res; + + res = rfkill_register(rdev->rfkill); if (res) { - rtnl_unlock(); + device_del(&rdev->wiphy.dev); return res; } + rtnl_lock(); /* set up regulatory info */ wiphy_regulatory_register(wiphy); @@ -589,17 +593,6 @@ int wiphy_register(struct wiphy *wiphy) cfg80211_debugfs_rdev_add(rdev); - res = rfkill_register(rdev->rfkill); - if (res) { - device_del(&rdev->wiphy.dev); - - debugfs_remove_recursive(rdev->wiphy.debugfsdir); - list_del_rcu(&rdev->list); - wiphy_regulatory_deregister(wiphy); - rtnl_unlock(); - return res; - } - rdev->wiphy.registered = true; rtnl_unlock(); return 0; @@ -636,11 +629,11 @@ void wiphy_unregister(struct wiphy *wiphy) rtnl_unlock(); __count == 0; })); + rfkill_unregister(rdev->rfkill); + rtnl_lock(); rdev->wiphy.registered = false; - rfkill_unregister(rdev->rfkill); - BUG_ON(!list_empty(&rdev->wdev_list)); /* -- cgit v1.2.3 From 9b881963c1c81f965f89a3e89b1aa5557f67ee30 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Jun 2013 22:23:36 +0200 Subject: cfg80211: make wiphy index start at 0 again The change to use atomic_inc_return() for assigning the wiphy index made the first wiphy index 1 instead of 0. This is fine, but we all habitually type "phy0" when we're testing, so make it go back to 0 instead of 1 by subtracting 1 from the index. Signed-off-by: Johannes Berg --- net/wireless/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 99d86ddb633..4224e7554a7 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -301,6 +301,9 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) return NULL; } + /* atomic_inc_return makes it start at 1, make it start at 0 */ + rdev->wiphy_idx--; + /* give it a proper name */ dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); -- cgit v1.2.3 From ebd4687af732a903f6822bb129d2f3a7d830e798 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sat, 1 Jun 2013 16:23:15 +0000 Subject: xfrm: simplify the exit path of xfrm_output_one() Clean up unnecessary assignment and jump. While there, fix up the label name. Signed-off-by: Jean Sacren Signed-off-by: David S. Miller --- net/xfrm/xfrm_output.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 0cf003dfa8f..eb4a8428864 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -89,7 +89,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) err = x->type->output(x, skb); if (err == -EINPROGRESS) - goto out_exit; + goto out; resume: if (err) { @@ -107,15 +107,14 @@ resume: x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); - err = 0; + return 0; -out_exit: - return err; error: spin_unlock_bh(&x->lock); error_nolock: kfree_skb(skb); - goto out_exit; +out: + return err; } int xfrm_output_resume(struct sk_buff *skb, int err) -- cgit v1.2.3 From 4960c2c6fa252d2e90796074427db3f2297b523b Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sat, 1 Jun 2013 16:23:17 +0000 Subject: Kconfig: remove dangling references to the deleted file Commit 202dc3fc599c1dded235d3b448d9ca924252e354 (Documentation: remove obsolete networking/multicast.txt file) deleted the obsolete file. After the file has been removed, clean up a couple of places where references to the deleted file were made so that users wouldn't be confused when they consult the Help menu. Signed-off-by: Jean Sacren Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8603ca82710..37cf1a6ea3a 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -9,10 +9,7 @@ config IP_MULTICAST intend to participate in the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. More information about the MBONE is on the WWW at - . Information about the multicast - capabilities of the various network cards is contained in - . For most people, it's - safe to say N. + . For most people, it's safe to say N. config IP_ADVANCED_ROUTER bool "IP: advanced router" @@ -223,10 +220,8 @@ config IP_MROUTE packets that have several destination addresses. It is needed on the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. In order to do that, you would most - likely run the program mrouted. Information about the multicast - capabilities of the various network cards is contained in - . If you haven't heard - about it, you don't need it. + likely run the program mrouted. If you haven't heard about it, you + don't need it. config IP_MROUTE_MULTIPLE_TABLES bool "IP: multicast policy routing" -- cgit v1.2.3 From 430f03cde2fb9596d8b562824471e298a8080df9 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 2 Jun 2013 20:43:55 +0000 Subject: net: mark netdev_create_hash __net_init netdev_create_hash() is only called from netdev_init() which is marked __net_init. Signed-off-by: Baruch Siach Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d4d874a25e4..9c18557f93c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6088,7 +6088,7 @@ netdev_features_t netdev_increment_features(netdev_features_t all, } EXPORT_SYMBOL(netdev_increment_features); -static struct hlist_head *netdev_create_hash(void) +static struct hlist_head * __net_init netdev_create_hash(void) { int i; struct hlist_head *hash; -- cgit v1.2.3 From c26d6b46da3ee86fa8a864347331e5513ca84c2b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Jun 2013 22:43:52 +0000 Subject: ping: always initialize ->sin6_scope_id and ->sin6_flowinfo If we don't need scope id, we should initialize it to zero. Same for ->sin6_flowinfo. Cc: Lorenzo Colitti Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ping.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3552a45a6f8..1f1b2dd9027 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -892,12 +892,12 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sin6->sin6_port = 0; sin6->sin6_addr = ip6->saddr; + sin6->sin6_flowinfo = 0; if (np->sndflow) sin6->sin6_flowinfo = ip6_flowinfo(ip6); - if (__ipv6_addr_needs_scope_id( - ipv6_addr_type(&sin6->sin6_addr))) - sin6->sin6_scope_id = IP6CB(skb)->iif; + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); if (inet6_sk(sk)->rxopt.all) pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb); -- cgit v1.2.3 From 600fed5e97afca10356952e334f362e82fc71466 Mon Sep 17 00:00:00 2001 From: Yan Burman Date: Mon, 3 Jun 2013 02:03:34 +0000 Subject: net/ethtool: Fix comment regarding location of dev_ethtool() call Signed-off-by: Yan Burman Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4e6f63ade74..cd23d314d68 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1414,7 +1414,7 @@ static int ethtool_get_module_eeprom(struct net_device *dev, modinfo.eeprom_len); } -/* The main entry point in this file. Called from net/core/dev.c */ +/* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) { -- cgit v1.2.3 From 525cebedb32a87fa48584bc44e14170beb2c10d1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 3 Jun 2013 11:49:23 +0000 Subject: pktgen: Fix position of ip and udp header skb_set_network_header() expects an offset based on the data pointer whereas skb_tail_offset() also includes the headroom. This resulted in the ip header being written in a wrong location. Use return values of skb_put() directly and rely on skb->len to set mac, network, and transport header. Cc: Simon Horman Cc: Daniel Borkmann Assisted-by: Daniel Borkmann Signed-off-by: Thomas Graf Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/pktgen.c | 39 ++++++++++++--------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d2ede89662b..303412d8332 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2642,7 +2642,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ u16 queue_map; - unsigned long tail_offset; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2709,20 +2708,15 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IP); } - tail_offset = skb_tail_offset(skb); - if (tail_offset > 0xffff) { - kfree_skb(skb); - return NULL; - } - skb_set_network_header(skb, tail_offset); - skb->transport_header = skb->network_header + sizeof(struct iphdr); - skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, skb->len); + iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); + + skb_set_transport_header(skb, skb->len); + udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; - iph = ip_hdr(skb); - udph = udp_hdr(skb); - memcpy(eth, pkt_dev->hh, 12); *(__be16 *) & eth[12] = protocol; @@ -2752,8 +2746,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->check = 0; iph->check = ip_fast_csum((void *)iph, iph->ihl); skb->protocol = protocol; - skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; pktgen_finalize_skb(pkt_dev, skb, datalen); @@ -2781,7 +2773,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ u16 queue_map; - unsigned long tail_offset; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2829,18 +2820,14 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IPV6); } - tail_offset = skb_tail_offset(skb); - if (tail_offset > 0xffff) { - kfree_skb(skb); - return NULL; - } - skb_set_network_header(skb, tail_offset); - skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, skb->len); + iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); + + skb_set_transport_header(skb, skb->len); + udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; - iph = ipv6_hdr(skb); - udph = udp_hdr(skb); memcpy(eth, pkt_dev->hh, 12); *(__be16 *) ð[12] = protocol; @@ -2875,8 +2862,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, iph->daddr = pkt_dev->cur_in6_daddr; iph->saddr = pkt_dev->cur_in6_saddr; - skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->pkt_overhead); skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; -- cgit v1.2.3 From 00f97da17a0c8d656d0c9a60b1d7f38735f69817 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 3 Jun 2013 16:31:36 +0000 Subject: netpoll: fix position of network header Similar to the problem in pktgen, netpoll uses skb_tail_offset() too, as the code is copied from pktgen. Also use return values of skb_put() directly, this will simiplify the code. Reported-by: Thomas Graf Cc: Thomas Graf Cc: Daniel Borkmann Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/netpoll.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 688517c7ff1..03c8ec3edc7 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -676,8 +676,6 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo spin_lock_irqsave(&npinfo->rx_lock, flags); list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - unsigned long tail_offset; - if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) continue; @@ -691,30 +689,20 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo send_skb->dev = skb->dev; skb_reset_network_header(send_skb); - skb_put(send_skb, sizeof(struct ipv6hdr)); - hdr = ipv6_hdr(send_skb); - + hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr)); *(__be32*)hdr = htonl(0x60000000); - hdr->payload_len = htons(size); hdr->nexthdr = IPPROTO_ICMPV6; hdr->hop_limit = 255; hdr->saddr = *saddr; hdr->daddr = *daddr; - tail_offset = skb_tail_offset(skb); - if (tail_offset > 0xffff) { - kfree_skb(send_skb); - continue; - } - skb_set_network_header(send_skb, tail_offset); - skb_put(send_skb, size); - - icmp6h = (struct icmp6hdr *)skb_transport_header(skb); + icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr)); icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; icmp6h->icmp6_router = 0; icmp6h->icmp6_solicited = 1; - target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); + + target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr)); *target = msg->target; icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, IPPROTO_ICMPV6, -- cgit v1.2.3 From 482a9c74fa17c5d584995c19e1a36eaf710d1193 Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Mon, 3 Jun 2013 17:29:33 +0300 Subject: mac80211: fix powersave bug and clean up ieee80211_rx_bss_info ieee80211_rx_bss_info() deals with dtim_period setting and PS update when associated. Move all these to another locations cleaning this function. Also, the current implementation is buggy because when it calls ieee80211_recalc_ps() bss_conf->dtim_period is notset properly yet and thus nothing will happen. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index df8170a80a5..aa5cd2e138b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2737,24 +2737,9 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, int freq; struct ieee80211_bss *bss; struct ieee80211_channel *channel; - bool need_ps = false; sdata_assert_lock(sdata); - if ((sdata->u.mgd.associated && - ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) || - (sdata->u.mgd.assoc_data && - ether_addr_equal(mgmt->bssid, - sdata->u.mgd.assoc_data->bss->bssid))) { - /* not previously set so we may need to recalc */ - need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period; - - if (elems->tim && !elems->parse_error) { - const struct ieee80211_tim_ie *tim_ie = elems->tim; - sdata->u.mgd.dtim_period = tim_ie->dtim_period; - } - } - if (elems->ds_params) freq = ieee80211_channel_to_frequency(elems->ds_params[0], rx_status->band); @@ -2775,12 +2760,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) return; - if (need_ps) { - mutex_lock(&local->iflist_mtx); - ieee80211_recalc_ps(local, -1); - mutex_unlock(&local->iflist_mtx); - } - ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, elems, true); @@ -2894,6 +2873,10 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, len - baselen, false, &elems); ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); + if (elems.tim && !elems.parse_error) { + const struct ieee80211_tim_ie *tim_ie = elems.tim; + ifmgd->dtim_period = tim_ie->dtim_period; + } ifmgd->assoc_data->have_beacon = true; ifmgd->assoc_data->need_beacon = false; if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { @@ -3096,6 +3079,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } changed |= BSS_CHANGED_DTIM_PERIOD; + + mutex_lock(&local->iflist_mtx); + ieee80211_recalc_ps(local, -1); + mutex_unlock(&local->iflist_mtx); + ieee80211_recalc_ps_vif(sdata); } -- cgit v1.2.3 From 989c6505cdda587f87573bb6828f23964dd3d19b Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Thu, 16 May 2013 17:34:17 +0300 Subject: mac80211: Use suitable semantics for beacon availability indication Currently beacon availability upon association is marked by have_beacon flag of assoc_data structure that becomes unavailable when association completes. However beacon availability indication is required also after association to inform a driver. Currently dtim_period parameter is used for this purpose. Move have_beacon flag to another structure, persistant throughout a interface's life cycle. Use suitable sematics for beacon availability indication. Signed-off-by: Alexander Bondar [fix another instance of BSS_CHANGED_DTIM_PERIOD in docs] Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/mlme.c | 20 +++++++++++--------- net/mac80211/util.c | 5 +++-- 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9eed6f1d161..7a6f1a0207e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -366,7 +366,7 @@ struct ieee80211_mgd_assoc_data { u8 ssid_len; u8 supp_rates_len; bool wmm, uapsd; - bool have_beacon, need_beacon; + bool need_beacon; bool synced; bool timeout_started; @@ -404,6 +404,7 @@ struct ieee80211_if_managed { bool powersave; /* powersave requested for this iface */ bool broken_ap; /* AP is broken -- turn off powersave */ + bool have_beacon; u8 dtim_period; enum ieee80211_smps_mode req_smps, /* requested smps mode */ driver_smps_mode; /* smps mode request */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index aa5cd2e138b..ad9bb9e10cb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1360,7 +1360,7 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) IEEE80211_STA_CONNECTION_POLL)) return false; - if (!sdata->vif.bss_conf.dtim_period) + if (!mgd->have_beacon) return false; rcu_read_lock(); @@ -1771,7 +1771,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_led_assoc(local, 1); - if (sdata->u.mgd.assoc_data->have_beacon) { + if (sdata->u.mgd.have_beacon) { /* * If the AP is buggy we may get here with no DTIM period * known, so assume it's 1 which is the only safe assumption @@ -1779,7 +1779,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, * probably just won't work at all. */ bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1; - bss_info_changed |= BSS_CHANGED_DTIM_PERIOD; + bss_info_changed |= BSS_CHANGED_BEACON_INFO; } else { bss_conf->dtim_period = 0; } @@ -1903,6 +1903,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.chswitch_timer); sdata->vif.bss_conf.dtim_period = 0; + ifmgd->have_beacon = false; ifmgd->flags = 0; ieee80211_vif_release_channel(sdata); @@ -2877,7 +2878,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, const struct ieee80211_tim_ie *tim_ie = elems.tim; ifmgd->dtim_period = tim_ie->dtim_period; } - ifmgd->assoc_data->have_beacon = true; + ifmgd->have_beacon = true; ifmgd->assoc_data->need_beacon = false; if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { sdata->vif.bss_conf.sync_tsf = @@ -3059,7 +3060,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, * If we haven't had a beacon before, tell the driver about the * DTIM period (and beacon timing if desired) now. */ - if (!bss_conf->dtim_period) { + if (!ifmgd->have_beacon) { /* a few bogus AP send dtim_period = 0 or no TIM IE */ if (elems.tim) bss_conf->dtim_period = elems.tim->dtim_period ?: 1; @@ -3078,7 +3079,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.sync_dtim_count = 0; } - changed |= BSS_CHANGED_DTIM_PERIOD; + changed |= BSS_CHANGED_BEACON_INFO; + ifmgd->have_beacon = true; mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local, -1); @@ -3424,8 +3426,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started && time_after(jiffies, ifmgd->assoc_data->timeout)) { - if ((ifmgd->assoc_data->need_beacon && - !ifmgd->assoc_data->have_beacon) || + if ((ifmgd->assoc_data->need_beacon && !ifmgd->have_beacon) || ieee80211_do_assoc(sdata)) { u8 bssid[ETH_ALEN]; @@ -4193,6 +4194,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->assoc_data = assoc_data; ifmgd->dtim_period = 0; + ifmgd->have_beacon = false; err = ieee80211_prep_connection(sdata, req->bss, true); if (err) @@ -4224,7 +4226,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->dtim_period = tim->dtim_period; dtim_count = tim->dtim_count; } - assoc_data->have_beacon = true; + ifmgd->have_beacon = true; assoc_data->timeout = jiffies; assoc_data->timeout_started = true; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 89a83770d15..5a6c1351d1d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1584,8 +1584,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_ARP_FILTER | BSS_CHANGED_PS; - if (sdata->u.mgd.dtim_period) - changed |= BSS_CHANGED_DTIM_PERIOD; + /* Re-send beacon info report to the driver */ + if (sdata->u.mgd.have_beacon) + changed |= BSS_CHANGED_BEACON_INFO; sdata_lock(sdata); ieee80211_bss_info_change_notify(sdata, changed); -- cgit v1.2.3 From 780b40df12cf0161d8ccc5381940e04584793933 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Jun 2013 09:32:50 +0200 Subject: wireless: fix kernel-doc Some kernel-doc fixes for forgotten fields and renamed things. Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 41c28b977f7..bd12fc54266 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -297,6 +297,9 @@ struct sta_ampdu_mlme { * @rcu_head: RCU head used for freeing this station struct * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, * taken from HT/VHT capabilities or VHT operating mode notification + * @chains: chains ever used for RX from this station + * @chain_signal_last: last signal (per chain) + * @chain_signal_avg: signal average (per chain) */ struct sta_info { /* General information, mostly static */ -- cgit v1.2.3 From 9cefbbc9c8f9abe0bc514dcfca46e8051ee84050 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 4 Jun 2013 22:22:15 +0000 Subject: netfilter: nfnetlink_queue: cleanup copy_range usage For every packet queued, we check if configured copy_range is 0, and treat that as 'copy entire packet'. We can move this check to the queue configuration, and can set copy_range appropriately. Also, convert repetitive '0xffff - NLA_HDRLEN' to a macro. [ queue initialization still used 0xffff, although its harmless since the initial setting is overwritten on queue config ] Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index cff4449f01d..3c4218141d7 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -41,6 +41,14 @@ #define NFQNL_QMAX_DEFAULT 1024 +/* We're using struct nlattr which has 16bit nla_len. Note that nla_len + * includes the header length. Thus, the maximum packet length that we + * support is 65531 bytes. We send truncated packets if the specified length + * is larger than that. Userspace can check for presence of NFQA_CAP_LEN + * attribute to detect truncation. + */ +#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN) + struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ struct rcu_head rcu; @@ -122,7 +130,7 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, inst->queue_num = queue_num; inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; - inst->copy_range = 0xffff; + inst->copy_range = NFQNL_MAX_COPY_RANGE; inst->copy_mode = NFQNL_COPY_NONE; spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); @@ -333,10 +341,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, return NULL; data_len = ACCESS_ONCE(queue->copy_range); - if (data_len == 0 || data_len > entskb->len) + if (data_len > entskb->len) data_len = entskb->len; - if (!entskb->head_frag || skb_headlen(entskb) < L1_CACHE_BYTES || skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) @@ -727,13 +734,8 @@ nfqnl_set_mode(struct nfqnl_instance *queue, case NFQNL_COPY_PACKET: queue->copy_mode = mode; - /* We're using struct nlattr which has 16bit nla_len. Note that - * nla_len includes the header length. Thus, the maximum packet - * length that we support is 65531 bytes. We send truncated - * packets if the specified length is larger than that. - */ - if (range > 0xffff - NLA_HDRLEN) - queue->copy_range = 0xffff - NLA_HDRLEN; + if (range == 0 || range > NFQNL_MAX_COPY_RANGE) + queue->copy_range = NFQNL_MAX_COPY_RANGE; else queue->copy_range = range; break; -- cgit v1.2.3 From 7f87712c0152511a1842698ad8dca425fee2dc4f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 4 Jun 2013 22:22:16 +0000 Subject: netfilter: nfnetlink_queue: only add CAP_LEN attr when needed CAP_LEN contains the size of the network packet we're queueing to userspace, i.e. normally it is the same as the NFQA_PAYLOAD attribute len. Include it only in the unlikely case when NFQA_PAYLOAD is truncated due to copy_range limitations. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 3c4218141d7..eb2cde836b9 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -472,7 +472,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; - if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) + if (cap_len > data_len && + nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) goto nla_put_failure; if (nfqnl_put_packet_info(skb, entskb)) -- cgit v1.2.3 From 4c4d41f200db375b2d2cc6d0a1de0606c8266398 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Thu, 6 Jun 2013 10:15:54 +0800 Subject: xfrm: add LINUX_MIB_XFRMACQUIREERROR statistic counter When host ping its peer, ICMP echo request packet triggers IPsec policy, then host negotiates SA secret with its peer. After IKE installed SA for OUT direction, but before SA for IN direction installed, host get ICMP echo reply from its peer. At the time being, the SA state for IN direction could be XFRM_STATE_ACQ, then the received packet will be dropped after adding LINUX_MIB_XFRMINSTATEINVALID statistic. Adding a LINUX_MIB_XFRMACQUIREERROR statistic counter for such scenario when SA in larval state is much clearer for user than LINUX_MIB_XFRMINSTATEINVALID which indicates the SA is totally bad. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 5 +++++ net/xfrm/xfrm_proc.c | 1 + 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ab2bb42fe09..88843996f93 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -163,6 +163,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; spin_lock(&x->lock); + if (unlikely(x->km.state == XFRM_STATE_ACQ)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); + goto drop_unlock; + } + if (unlikely(x->km.state != XFRM_STATE_VALID)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); goto drop_unlock; diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index c721b0d9ab8..80cd1e55b83 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -44,6 +44,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), + SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), SNMP_MIB_SENTINEL }; -- cgit v1.2.3 From a76580fbf09e6e19c2040c08969af5137e064eda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 May 2013 23:00:18 -0400 Subject: SUNRPC: Fix a potential race in rpc_execute If the rpc_task is asynchronous, it could theoretically finish executing on the workqueue it was assigned by rpc_make_runnable() before we get round to testing RPC_IS_ASYNC() in rpc_execute. In practice, however, all the existing callers hold a reference to the rpc_task, so this can't happen today... Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 5356b120dbf..849ca413522 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -825,9 +825,11 @@ static void __rpc_execute(struct rpc_task *task) */ void rpc_execute(struct rpc_task *task) { + bool is_async = RPC_IS_ASYNC(task); + rpc_set_active(task); rpc_make_runnable(task); - if (!RPC_IS_ASYNC(task)) + if (!is_async) __rpc_execute(task); } -- cgit v1.2.3 From 0053a8e65c0b949fd230488e5be871755f3f860f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 21 May 2013 12:51:32 -0400 Subject: SUNRPC: Remove unused function rpc_queue_empty Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 849ca413522..dcbd69cb1cb 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -445,20 +445,6 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r } } -/* - * Tests whether rpc queue is empty - */ -int rpc_queue_empty(struct rpc_wait_queue *queue) -{ - int res; - - spin_lock_bh(&queue->lock); - res = queue->qlen; - spin_unlock_bh(&queue->lock); - return res == 0; -} -EXPORT_SYMBOL_GPL(rpc_queue_empty); - /* * Wake up a task on a specific queue */ -- cgit v1.2.3 From 9ec2ef53b92fdbb1b5f24af000fc2ba0b18221ea Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 May 2013 18:52:18 -0400 Subject: SUNRPC: Remove redundant call to rpc_set_running() in __rpc_execute() The RPC_TASK_RUNNING flag will always have been set in rpc_make_runnable() once we get past the test for out_of_line_wait_on_bit() returning ERESTARTSYS. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index dcbd69cb1cb..b7b32c34c18 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -790,7 +790,6 @@ static void __rpc_execute(struct rpc_task *task) task->tk_flags |= RPC_TASK_KILLED; rpc_exit(task, -ERESTARTSYS); } - rpc_set_running(task); dprintk("RPC: %5u sync task resuming\n", task->tk_pid); } -- cgit v1.2.3 From 5ee98591577aa63dbb9e78a0d142abc86b9063d0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 7 Jun 2013 05:11:45 +0000 Subject: net: minor: tcp: use tcp_skb_mss helper in tcp_tso_segment We have the minimal inline helper tcp_skb_mss to access skb_shinfo(skb)->gso_size, so also use it here to get mss. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b5d4ad98805..6a1cf95abc9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2905,7 +2905,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, oldlen = (u16)~skb->len; __skb_pull(skb, thlen); - mss = skb_shinfo(skb)->gso_size; + mss = tcp_skb_mss(skb); if (unlikely(skb->len <= mss)) goto out; @@ -3071,7 +3071,7 @@ found: flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); - mss = skb_shinfo(p)->gso_size; + mss = tcp_skb_mss(p); flush |= (len - 1) >= mss; flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); -- cgit v1.2.3 From 28850dc7c71da9d0c0e39246e9ff6913f41f8d0a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 7 Jun 2013 05:11:46 +0000 Subject: net: tcp: move GRO/GSO functions to tcp_offload Would be good to make things explicit and move those functions to a new file called tcp_offload.c, thus make this similar to tcpv6_offload.c. While moving all related functions into tcp_offload.c, we can also make some of them static, since they are only used there. Also, add an explicit registration function. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/Makefile | 2 +- net/ipv4/af_inet.c | 13 +- net/ipv4/tcp.c | 241 ----------------------------------- net/ipv4/tcp_ipv4.c | 66 +--------- net/ipv4/tcp_offload.c | 332 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 336 insertions(+), 318 deletions(-) create mode 100644 net/ipv4/tcp_offload.c (limited to 'net') diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 089cb9f3638..4d3e138c564 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - datagram.o raw.o udp.o udplite.o \ + tcp_offload.o datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9c090c7daea..7b514290efc 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1559,15 +1559,6 @@ static const struct net_protocol tcp_protocol = { .netns_ok = 1, }; -static const struct net_offload tcp_offload = { - .callbacks = { - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, - }, -}; - static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, @@ -1683,8 +1674,8 @@ static int __init ipv4_offload_init(void) */ if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); - if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) - pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); + if (tcpv4_offload_init() < 0) + pr_crit("%s: Cannot add TCP protocol offload\n", __func__); dev_add_offload(&ip_packet_offload); return 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6a1cf95abc9..bc4246940f6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2877,247 +2877,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct tcphdr *th; - unsigned int thlen; - unsigned int seq; - __be32 delta; - unsigned int oldlen; - unsigned int mss; - struct sk_buff *gso_skb = skb; - __sum16 newcheck; - bool ooo_okay, copy_destructor; - - if (!pskb_may_pull(skb, sizeof(*th))) - goto out; - - th = tcp_hdr(skb); - thlen = th->doff * 4; - if (thlen < sizeof(*th)) - goto out; - - if (!pskb_may_pull(skb, thlen)) - goto out; - - oldlen = (u16)~skb->len; - __skb_pull(skb, thlen); - - mss = tcp_skb_mss(skb); - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCPV6 | - SKB_GSO_GRE | - SKB_GSO_MPLS | - SKB_GSO_UDP_TUNNEL | - 0) || - !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - copy_destructor = gso_skb->destructor == tcp_wfree; - ooo_okay = gso_skb->ooo_okay; - /* All segments but the first should have ooo_okay cleared */ - skb->ooo_okay = 0; - - segs = skb_segment(skb, features); - if (IS_ERR(segs)) - goto out; - - /* Only first segment might have ooo_okay set */ - segs->ooo_okay = ooo_okay; - - delta = htonl(oldlen + (thlen + mss)); - - skb = segs; - th = tcp_hdr(skb); - seq = ntohl(th->seq); - - newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); - - do { - th->fin = th->psh = 0; - th->check = newcheck; - - if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = - csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); - - seq += mss; - if (copy_destructor) { - skb->destructor = gso_skb->destructor; - skb->sk = gso_skb->sk; - /* {tcp|sock}_wfree() use exact truesize accounting : - * sum(skb->truesize) MUST be exactly be gso_skb->truesize - * So we account mss bytes of 'true size' for each segment. - * The last segment will contain the remaining. - */ - skb->truesize = mss; - gso_skb->truesize -= mss; - } - skb = skb->next; - th = tcp_hdr(skb); - - th->seq = htonl(seq); - th->cwr = 0; - } while (skb->next); - - /* Following permits TCP Small Queues to work well with GSO : - * The callback to TCP stack will be called at the time last frag - * is freed at TX completion, and not right now when gso_skb - * is freed by GSO engine - */ - if (copy_destructor) { - swap(gso_skb->sk, skb->sk); - swap(gso_skb->destructor, skb->destructor); - swap(gso_skb->truesize, skb->truesize); - } - - delta = htonl(oldlen + (skb_tail_pointer(skb) - - skb_transport_header(skb)) + - skb->data_len); - th->check = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); - if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); - -out: - return segs; -} -EXPORT_SYMBOL(tcp_tso_segment); - -struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) -{ - struct sk_buff **pp = NULL; - struct sk_buff *p; - struct tcphdr *th; - struct tcphdr *th2; - unsigned int len; - unsigned int thlen; - __be32 flags; - unsigned int mss = 1; - unsigned int hlen; - unsigned int off; - int flush = 1; - int i; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*th); - th = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - goto out; - } - - thlen = th->doff * 4; - if (thlen < sizeof(*th)) - goto out; - - hlen = off + thlen; - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - goto out; - } - - skb_gro_pull(skb, thlen); - - len = skb_gro_len(skb); - flags = tcp_flag_word(th); - - for (; (p = *head); head = &p->next) { - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - th2 = tcp_hdr(p); - - if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } - - goto found; - } - - goto out_check_final; - -found: - flush = NAPI_GRO_CB(p)->flush; - flush |= (__force int)(flags & TCP_FLAG_CWR); - flush |= (__force int)((flags ^ tcp_flag_word(th2)) & - ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); - flush |= (__force int)(th->ack_seq ^ th2->ack_seq); - for (i = sizeof(*th); i < thlen; i += 4) - flush |= *(u32 *)((u8 *)th + i) ^ - *(u32 *)((u8 *)th2 + i); - - mss = tcp_skb_mss(p); - - flush |= (len - 1) >= mss; - flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); - - if (flush || skb_gro_receive(head, skb)) { - mss = 1; - goto out_check_final; - } - - p = *head; - th2 = tcp_hdr(p); - tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); - -out_check_final: - flush = len < mss; - flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | - TCP_FLAG_RST | TCP_FLAG_SYN | - TCP_FLAG_FIN)); - - if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) - pp = head; - -out: - NAPI_GRO_CB(skb)->flush |= flush; - - return pp; -} -EXPORT_SYMBOL(tcp_gro_receive); - -int tcp_gro_complete(struct sk_buff *skb) -{ - struct tcphdr *th = tcp_hdr(skb); - - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - - skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - - if (th->cwr) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - - return 0; -} -EXPORT_SYMBOL(tcp_gro_complete); - #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; static DEFINE_MUTEX(tcp_md5sig_mutex); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d20ede0c959..289039b4d8d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -545,8 +545,7 @@ out: sock_put(sk); } -static void __tcp_v4_send_check(struct sk_buff *skb, - __be32 saddr, __be32 daddr) +void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct tcphdr *th = tcp_hdr(skb); @@ -571,23 +570,6 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); -int tcp_v4_gso_send_check(struct sk_buff *skb) -{ - const struct iphdr *iph; - struct tcphdr *th; - - if (!pskb_may_pull(skb, sizeof(*th))) - return -EINVAL; - - iph = ip_hdr(skb); - th = tcp_hdr(skb); - - th->check = 0; - skb->ip_summed = CHECKSUM_PARTIAL; - __tcp_v4_send_check(skb, iph->saddr, iph->daddr); - return 0; -} - /* * This routine will send an RST to the other tcp. * @@ -2795,52 +2777,6 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) -{ - const struct iphdr *iph = skb_gro_network_header(skb); - __wsum wsum; - __sum16 sum; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } -flush: - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - - case CHECKSUM_NONE: - wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), IPPROTO_TCP, 0); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - - return tcp_gro_receive(head, skb); -} - -int tcp4_gro_complete(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); - - th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), - iph->saddr, iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - - return tcp_gro_complete(skb); -} - struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c new file mode 100644 index 00000000000..3a7525e6c08 --- /dev/null +++ b/net/ipv4/tcp_offload.c @@ -0,0 +1,332 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TCPv4 GSO/GRO support + */ + +#include +#include +#include + +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct tcphdr *th; + unsigned int thlen; + unsigned int seq; + __be32 delta; + unsigned int oldlen; + unsigned int mss; + struct sk_buff *gso_skb = skb; + __sum16 newcheck; + bool ooo_okay, copy_destructor; + + if (!pskb_may_pull(skb, sizeof(*th))) + goto out; + + th = tcp_hdr(skb); + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + if (!pskb_may_pull(skb, thlen)) + goto out; + + oldlen = (u16)~skb->len; + __skb_pull(skb, thlen); + + mss = tcp_skb_mss(skb); + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + SKB_GSO_GRE | + SKB_GSO_MPLS | + SKB_GSO_UDP_TUNNEL | + 0) || + !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + copy_destructor = gso_skb->destructor == tcp_wfree; + ooo_okay = gso_skb->ooo_okay; + /* All segments but the first should have ooo_okay cleared */ + skb->ooo_okay = 0; + + segs = skb_segment(skb, features); + if (IS_ERR(segs)) + goto out; + + /* Only first segment might have ooo_okay set */ + segs->ooo_okay = ooo_okay; + + delta = htonl(oldlen + (thlen + mss)); + + skb = segs; + th = tcp_hdr(skb); + seq = ntohl(th->seq); + + newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + + do { + th->fin = th->psh = 0; + th->check = newcheck; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = + csum_fold(csum_partial(skb_transport_header(skb), + thlen, skb->csum)); + + seq += mss; + if (copy_destructor) { + skb->destructor = gso_skb->destructor; + skb->sk = gso_skb->sk; + /* {tcp|sock}_wfree() use exact truesize accounting : + * sum(skb->truesize) MUST be exactly be gso_skb->truesize + * So we account mss bytes of 'true size' for each segment. + * The last segment will contain the remaining. + */ + skb->truesize = mss; + gso_skb->truesize -= mss; + } + skb = skb->next; + th = tcp_hdr(skb); + + th->seq = htonl(seq); + th->cwr = 0; + } while (skb->next); + + /* Following permits TCP Small Queues to work well with GSO : + * The callback to TCP stack will be called at the time last frag + * is freed at TX completion, and not right now when gso_skb + * is freed by GSO engine + */ + if (copy_destructor) { + swap(gso_skb->sk, skb->sk); + swap(gso_skb->destructor, skb->destructor); + swap(gso_skb->truesize, skb->truesize); + } + + delta = htonl(oldlen + (skb_tail_pointer(skb) - + skb_transport_header(skb)) + + skb->data_len); + th->check = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = csum_fold(csum_partial(skb_transport_header(skb), + thlen, skb->csum)); +out: + return segs; +} +EXPORT_SYMBOL(tcp_tso_segment); + +struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct tcphdr *th; + struct tcphdr *th2; + unsigned int len; + unsigned int thlen; + __be32 flags; + unsigned int mss = 1; + unsigned int hlen; + unsigned int off; + int flush = 1; + int i; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } + + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + hlen = off + thlen; + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } + + skb_gro_pull(skb, thlen); + + len = skb_gro_len(skb); + flags = tcp_flag_word(th); + + for (; (p = *head); head = &p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + th2 = tcp_hdr(p); + + if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + goto found; + } + + goto out_check_final; + +found: + flush = NAPI_GRO_CB(p)->flush; + flush |= (__force int)(flags & TCP_FLAG_CWR); + flush |= (__force int)((flags ^ tcp_flag_word(th2)) & + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); + flush |= (__force int)(th->ack_seq ^ th2->ack_seq); + for (i = sizeof(*th); i < thlen; i += 4) + flush |= *(u32 *)((u8 *)th + i) ^ + *(u32 *)((u8 *)th2 + i); + + mss = tcp_skb_mss(p); + + flush |= (len - 1) >= mss; + flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); + + if (flush || skb_gro_receive(head, skb)) { + mss = 1; + goto out_check_final; + } + + p = *head; + th2 = tcp_hdr(p); + tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); + +out_check_final: + flush = len < mss; + flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | + TCP_FLAG_RST | TCP_FLAG_SYN | + TCP_FLAG_FIN)); + + if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) + pp = head; + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} +EXPORT_SYMBOL(tcp_gro_receive); + +int tcp_gro_complete(struct sk_buff *skb) +{ + struct tcphdr *th = tcp_hdr(skb); + + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + if (th->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + + return 0; +} +EXPORT_SYMBOL(tcp_gro_complete); + +static int tcp_v4_gso_send_check(struct sk_buff *skb) +{ + const struct iphdr *iph; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + iph = ip_hdr(skb); + th = tcp_hdr(skb); + + th->check = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + __tcp_v4_send_check(skb, iph->saddr, iph->daddr); + return 0; +} + +static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + + case CHECKSUM_NONE: + wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), IPPROTO_TCP, 0); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + return tcp_gro_receive(head, skb); +} + +static int tcp4_gro_complete(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), + iph->saddr, iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + return tcp_gro_complete(skb); +} + +static const struct net_offload tcpv4_offload = { + .callbacks = { + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + }, +}; + +int __init tcpv4_offload_init(void) +{ + return inet_add_offload(&tcpv4_offload, IPPROTO_TCP); +} -- cgit v1.2.3 From c05cdb1b864f548c0c3d8ae3b51264e6739a69b1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 3 Jun 2013 09:46:28 +0000 Subject: netlink: allow large data transfers from user-space I can hit ENOBUFS in the sendmsg() path with a large batch that is composed of many netlink messages. Here that limit is 8 MBytes of skbuff data area as kmalloc does not manage to get more than that. While discussing atomic rule-set for nftables with Patrick McHardy, we decided to put all rule-set updates that need to be applied atomically in one single batch to simplify the existing approach. However, as explained above, the existing netlink code limits us to a maximum of ~20000 rules that fit in one single batch without hitting ENOBUFS. iptables does not have such limitation as it is using vmalloc. This patch adds netlink_alloc_large_skb() which is only used in the netlink_sendmsg() path. It uses alloc_skb if the memory requested is <= one memory page, that should be the common case for most subsystems, else vmalloc for higher memory allocations. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index d0b3dd60d38..68c16737439 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -750,6 +750,10 @@ static void netlink_skb_destructor(struct sk_buff *skb) skb->head = NULL; } #endif + if (is_vmalloc_addr(skb->head)) { + vfree(skb->head); + skb->head = NULL; + } if (skb->sk != NULL) sock_rfree(skb); } @@ -1420,6 +1424,35 @@ struct sock *netlink_getsockbyfilp(struct file *filp) return sock; } +static struct sk_buff *netlink_alloc_large_skb(unsigned int size) +{ + struct sk_buff *skb; + void *data; + + if (size <= NLMSG_GOODSIZE) + return alloc_skb(size, GFP_KERNEL); + + skb = alloc_skb_head(GFP_KERNEL); + if (skb == NULL) + return NULL; + + data = vmalloc(size); + if (data == NULL) + goto err; + + skb->head = data; + skb->data = data; + skb_reset_tail_pointer(skb); + skb->end = skb->tail + size; + skb->len = 0; + skb->destructor = netlink_skb_destructor; + + return skb; +err: + kfree_skb(skb); + return NULL; +} + /* * Attach a skb to a netlink socket. * The caller must hold a reference to the destination socket. On error, the @@ -1510,7 +1543,7 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) return skb; delta = skb->end - skb->tail; - if (delta * 2 < skb->truesize) + if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) return skb; if (skb_shared(skb)) { @@ -2096,7 +2129,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; - skb = alloc_skb(len, GFP_KERNEL); + skb = netlink_alloc_large_skb(len); if (skb == NULL) goto out; -- cgit v1.2.3 From af12fa6e46aa651e7b86a4c4117b562518fef184 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 10 Jun 2013 11:39:41 +0300 Subject: net: add napi_id and hash Adds a napi_id and a hashing mechanism to lookup a napi by id. This will be used by subsequent patches to implement low latency Ethernet device polling. Based on a code sample by Eric Dumazet. Signed-off-by: Eliezer Tamir Signed-off-by: Eric Dumazet Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/dev.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 9c18557f93c..fa007dba6be 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -129,6 +129,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -166,6 +167,12 @@ static struct list_head offload_base __read_mostly; DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +/* protects napi_hash addition/deletion and napi_gen_id */ +static DEFINE_SPINLOCK(napi_hash_lock); + +static unsigned int napi_gen_id; +static DEFINE_HASHTABLE(napi_hash, 8); + seqcount_t devnet_rename_seq; static inline void dev_base_seq_inc(struct net *net) @@ -4136,6 +4143,58 @@ void napi_complete(struct napi_struct *n) } EXPORT_SYMBOL(napi_complete); +/* must be called under rcu_read_lock(), as we dont take a reference */ +struct napi_struct *napi_by_id(unsigned int napi_id) +{ + unsigned int hash = napi_id % HASH_SIZE(napi_hash); + struct napi_struct *napi; + + hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node) + if (napi->napi_id == napi_id) + return napi; + + return NULL; +} +EXPORT_SYMBOL_GPL(napi_by_id); + +void napi_hash_add(struct napi_struct *napi) +{ + if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { + + spin_lock(&napi_hash_lock); + + /* 0 is not a valid id, we also skip an id that is taken + * we expect both events to be extremely rare + */ + napi->napi_id = 0; + while (!napi->napi_id) { + napi->napi_id = ++napi_gen_id; + if (napi_by_id(napi->napi_id)) + napi->napi_id = 0; + } + + hlist_add_head_rcu(&napi->napi_hash_node, + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + + spin_unlock(&napi_hash_lock); + } +} +EXPORT_SYMBOL_GPL(napi_hash_add); + +/* Warning : caller is responsible to make sure rcu grace period + * is respected before freeing memory containing @napi + */ +void napi_hash_del(struct napi_struct *napi) +{ + spin_lock(&napi_hash_lock); + + if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) + hlist_del_rcu(&napi->napi_hash_node); + + spin_unlock(&napi_hash_lock); +} +EXPORT_SYMBOL_GPL(napi_hash_del); + void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { -- cgit v1.2.3 From 060212928670593fb89243640bf05cf89560b023 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 10 Jun 2013 11:39:50 +0300 Subject: net: add low latency socket poll Adds an ndo_ll_poll method and the code that supports it. This method can be used by low latency applications to busy-poll Ethernet device queues directly from the socket code. sysctl_net_ll_poll controls how many microseconds to poll. Default is zero (disabled). Individual protocol support will be added by subsequent patches. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Signed-off-by: Eliezer Tamir Acked-by: Eric Dumazet Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/Kconfig | 12 ++++++++++++ net/core/skbuff.c | 4 ++++ net/core/sock.c | 6 ++++++ net/core/sysctl_net_core.c | 10 ++++++++++ net/ipv4/proc.c | 1 + net/socket.c | 6 ++++++ 6 files changed, 39 insertions(+) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 523e43e6da1..d6a9ce6e180 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -243,6 +243,18 @@ config NETPRIO_CGROUP Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis +config NET_LL_RX_POLL + bool "Low Latency Receive Poll" + depends on X86_TSC + default n + ---help--- + Support Low Latency Receive Queue Poll. + (For network card drivers which support this option.) + When waiting for data in read or poll call directly into the the device driver + to flush packets which may be pending on the device queues into the stack. + + If unsure, say N. + config BQL boolean depends on SYSFS diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 73f57a0e152..4a4181e16c1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -733,6 +733,10 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->vlan_tci = old->vlan_tci; skb_copy_secmark(new, old); + +#ifdef CONFIG_NET_LL_RX_POLL + new->napi_id = old->napi_id; +#endif } /* diff --git a/net/core/sock.c b/net/core/sock.c index 88868a9d21d..788c0da5eed 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,6 +139,8 @@ #include #endif +#include + static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); @@ -2284,6 +2286,10 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); +#ifdef CONFIG_NET_LL_RX_POLL + sk->sk_napi_id = 0; +#endif + /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 741db5fc780..4b48f39582b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -19,6 +19,7 @@ #include #include #include +#include static int one = 1; @@ -284,6 +285,15 @@ static struct ctl_table net_core_table[] = { .proc_handler = flow_limit_table_len_sysctl }, #endif /* CONFIG_NET_FLOW_LIMIT */ +#ifdef CONFIG_NET_LL_RX_POLL + { + .procname = "low_latency_poll", + .data = &sysctl_net_ll_poll, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax + }, +#endif #endif /* CONFIG_NET */ { .procname = "netdev_budget", diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 2a5bf86d241..6577a1149a4 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -273,6 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), + SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS), SNMP_MIB_SENTINEL }; diff --git a/net/socket.c b/net/socket.c index 3ebdcb805c5..21fd29f63ed 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,6 +104,12 @@ #include #include #include +#include + +#ifdef CONFIG_NET_LL_RX_POLL +unsigned long sysctl_net_ll_poll __read_mostly; +EXPORT_SYMBOL_GPL(sysctl_net_ll_poll); +#endif static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, -- cgit v1.2.3 From a5b50476f77a8fcc8055c955720d05a7c2d9c532 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 10 Jun 2013 11:40:00 +0300 Subject: udp: add low latency socket poll support Add upport for busy-polling on UDP sockets. In __udp[46]_lib_rcv add a call to sk_mark_ll() to copy the napi_id from the skb into the sk. This is done at the earliest possible moment, right after we identify which socket this skb is for. In __skb_recv_datagram When there is no data and the user tries to read we busy poll. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Signed-off-by: Eliezer Tamir Acked-by: Eric Dumazet Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/datagram.c | 4 ++++ net/ipv4/udp.c | 6 +++++- net/ipv6/udp.c | 6 +++++- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index b71423db778..9cbaba98ce4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -56,6 +56,7 @@ #include #include #include +#include /* * Is a socket 'connection oriented' ? @@ -207,6 +208,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, } spin_unlock_irqrestore(&queue->lock, cpu_flags); + if (sk_valid_ll(sk) && sk_poll_ll(sk, flags & MSG_DONTWAIT)) + continue; + /* User doesn't want to wait */ error = -EAGAIN; if (!timeo) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c7338ec79cc..2955b25aee6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -109,6 +109,7 @@ #include #include #include +#include #include "udp_impl.h" struct udp_table udp_table __read_mostly; @@ -1709,7 +1710,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { - int ret = udp_queue_rcv_skb(sk, skb); + int ret; + + sk_mark_ll(sk, skb); + ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b5808539cd5..f77e34c5a0e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -841,7 +842,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { - int ret = udpv6_queue_rcv_skb(sk, skb); + int ret; + + sk_mark_ll(sk, skb); + ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but -- cgit v1.2.3 From d30e383bb856f614ddb5bbbb5a7d3f86240e41ec Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 10 Jun 2013 11:40:10 +0300 Subject: tcp: add low latency socket poll support. Adds low latency socket poll support for TCP. In tcp_v[46]_rcv() add a call to sk_mark_ll() to copy the napi_id from the skb to the sk. In tcp_recvmsg(), when there is no data in the socket we busy-poll. This is a good example of how to add busy-poll support to more protocols. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Signed-off-by: Eliezer Tamir Acked-by: Eric Dumazet Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 +++++ net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/tcp_ipv6.c | 2 ++ 3 files changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bc4246940f6..46ed9afd1f5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,6 +279,7 @@ #include #include +#include int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; @@ -1553,6 +1554,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sk_buff *skb; u32 urg_hole = 0; + if (sk_valid_ll(sk) && skb_queue_empty(&sk->sk_receive_queue) + && (sk->sk_state == TCP_ESTABLISHED)) + sk_poll_ll(sk, nonblock); + lock_sock(sk); err = -ENOTCONN; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 289039b4d8d..1063bb83e34 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -1993,6 +1994,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; + sk_mark_ll(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0a17ed9eaf3..5cffa5c3e6b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -63,6 +63,7 @@ #include #include #include +#include #include @@ -1498,6 +1499,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; + sk_mark_ll(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); -- cgit v1.2.3 From 30f3a40f9a2a2869a560a9cb9ef488d10c803e14 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 5 Jun 2013 20:14:10 +0800 Subject: net: remove last caller of skb_tail_offset() and itself Similar to the following commits: commit 00f97da17a0c8d656d0c9 (netpoll: fix position of network header) commit 525cebedb32a87fa48584 (pktgen: Fix position of ip and udp header) using skb_tail_offset() seems not correct since the offset is based on head pointer. With the last caller removed, skb_tail_offset() can be killed finally. Cc: Thomas Graf Cc: Daniel Borkmann Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index df97f0ac1a1..132a0966470 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -945,7 +945,6 @@ static int ipmr_cache_report(struct mr_table *mrt, struct igmpmsg *msg; struct sock *mroute_sk; int ret; - unsigned long tail_offset; #ifdef CONFIG_IP_PIMSM if (assert == IGMPMSG_WHOLEPKT) @@ -981,12 +980,7 @@ static int ipmr_cache_report(struct mr_table *mrt, /* Copy the IP header */ - tail_offset = skb_tail_offset(skb); - if (tail_offset > 0xffff) { - kfree_skb(skb); - return -EINVAL; - } - skb_set_network_header(skb, tail_offset); + skb_set_network_header(skb, skb->len); skb_put(skb, ihl); skb_copy_to_linear_data(skb, pkt->data, ihl); ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ -- cgit v1.2.3 From 9ba18891f75535eca3ef53138b48970eb60f5255 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 5 Jun 2013 10:08:00 -0400 Subject: bridge: Add flag to control mac learning. Allow user to control whether mac learning is enabled on the port. By default, mac learning is enabled. Disabling mac learning will cause new dynamic FDB entries to not be created for a particular port. Signed-off-by: Vlad Yasevich Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 2 +- net/bridge/br_input.c | 6 ++++-- net/bridge/br_netlink.c | 6 +++++- net/bridge/br_private.h | 1 + net/bridge/br_sysfs_if.c | 2 ++ 5 files changed, 13 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4cdba60926f..2c08911df57 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -221,7 +221,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = 0; + p->flags = BR_LEARNING; br_init_port(p); p->state = BR_STATE_DISABLED; br_stp_port_timer_init(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 828e2bcc1f5..7e993667d4b 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -75,7 +75,8 @@ int br_handle_frame_finish(struct sk_buff *skb) /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; - br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); + if (p->flags & BR_LEARNING) + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) @@ -142,7 +143,8 @@ static int br_handle_local_finish(struct sk_buff *skb) u16 vid = 0; br_vlan_get_tag(skb, &vid); - br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); + if (p->flags & BR_LEARNING) + br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); return 0; /* process further */ } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8e3abf56479..ce902bf8a61 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -30,6 +30,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + 0; } @@ -56,7 +57,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || - nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE))) + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || + nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING))) return -EMSGSIZE; return 0; @@ -281,6 +283,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, + [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -328,6 +331,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); + br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1b0ac95a5c3..04d7f43508f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -158,6 +158,7 @@ struct net_bridge_port #define BR_ROOT_BLOCK 0x00000004 #define BR_MULTICAST_FAST_LEAVE 0x00000008 #define BR_ADMIN_COST 0x00000010 +#define BR_LEARNING 0x00000020 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index a1ef1b6e14d..707f3628e9c 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -158,6 +158,7 @@ static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); +BRPORT_ATTR_FLAG(learning, BR_LEARNING); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -195,6 +196,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_hairpin_mode, &brport_attr_bpdu_guard, &brport_attr_root_block, + &brport_attr_learning, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, -- cgit v1.2.3 From 867a59436fc35593ae0e0efcd56cc6d2f8506586 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 5 Jun 2013 10:08:01 -0400 Subject: bridge: Add a flag to control unicast packet flood. Add a flag to control flood of unicast traffic. By default, flood is on and the bridge will flood unicast traffic if it doesn't know the destination. When the flag is turned off, unicast traffic without an FDB will not be forwarded to the specified port. Signed-off-by: Vlad Yasevich Reviewed-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/bridge/br_device.c | 8 ++++---- net/bridge/br_forward.c | 14 +++++++++----- net/bridge/br_if.c | 2 +- net/bridge/br_input.c | 9 ++++++--- net/bridge/br_netlink.c | 6 +++++- net/bridge/br_private.h | 6 ++++-- net/bridge/br_sysfs_if.c | 2 ++ 7 files changed, 31 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 75f3239130f..2ef66781fed 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -58,10 +58,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); if (is_broadcast_ether_addr(dest)) - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); else if (is_multicast_ether_addr(dest)) { if (unlikely(netpoll_tx_running(dev))) { - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); goto out; } if (br_multicast_rcv(br, NULL, skb)) { @@ -73,11 +73,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) br_multicast_deliver(mdst, skb); else - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) br_deliver(dst->dst, skb); else - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, true); out: rcu_read_unlock(); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 092b20e4ee4..4b81b147178 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -174,7 +174,8 @@ out: static void br_flood(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb0, void (*__packet_hook)(const struct net_bridge_port *p, - struct sk_buff *skb)) + struct sk_buff *skb), + bool unicast) { struct net_bridge_port *p; struct net_bridge_port *prev; @@ -182,6 +183,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { + /* Do not flood unicast traffic to ports that turn it off */ + if (unicast && !(p->flags & BR_FLOOD)) + continue; prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) goto out; @@ -203,16 +207,16 @@ out: /* called with rcu_read_lock */ -void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb) +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast) { - br_flood(br, skb, NULL, __br_deliver); + br_flood(br, skb, NULL, __br_deliver, unicast); } /* called under bridge lock */ void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, - struct sk_buff *skb2) + struct sk_buff *skb2, bool unicast) { - br_flood(br, skb, skb2, __br_forward); + br_flood(br, skb, skb2, __br_forward, unicast); } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 2c08911df57..5623be6b9ec 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -221,7 +221,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = BR_LEARNING; + p->flags = BR_LEARNING | BR_FLOOD; br_init_port(p); p->state = BR_STATE_DISABLED; br_stp_port_timer_init(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7e993667d4b..1b8b8b824cd 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -65,6 +65,7 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct sk_buff *skb2; + bool unicast = true; u16 vid = 0; if (!p || p->state == BR_STATE_DISABLED) @@ -95,9 +96,10 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_broadcast_ether_addr(dest)) + if (is_broadcast_ether_addr(dest)) { skb2 = skb; - else if (is_multicast_ether_addr(dest)) { + unicast = false; + } else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb, vid); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && mdst->mglist) || @@ -110,6 +112,7 @@ int br_handle_frame_finish(struct sk_buff *skb) } else skb2 = skb; + unicast = false; br->dev->stats.multicast++; } else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) { @@ -123,7 +126,7 @@ int br_handle_frame_finish(struct sk_buff *skb) dst->used = jiffies; br_forward(dst->dst, skb, skb2); } else - br_flood_forward(br, skb, skb2); + br_flood_forward(br, skb, skb2, unicast); } if (skb2) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ce902bf8a61..1fc30abd3a5 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -31,6 +31,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + 0; } @@ -58,7 +59,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || - nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING))) + nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || + nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD))) return -EMSGSIZE; return 0; @@ -284,6 +286,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, + [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -332,6 +335,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); + br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 04d7f43508f..3be89b3ce17 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -159,6 +159,7 @@ struct net_bridge_port #define BR_MULTICAST_FAST_LEAVE 0x00000008 #define BR_ADMIN_COST 0x00000010 #define BR_LEARNING 0x00000020 +#define BR_FLOOD 0x00000040 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; @@ -414,9 +415,10 @@ extern int br_dev_queue_push_xmit(struct sk_buff *skb); extern void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0); extern int br_forward_finish(struct sk_buff *skb); -extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); +extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, + bool unicast); extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, - struct sk_buff *skb2); + struct sk_buff *skb2, bool unicast); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 707f3628e9c..2a2cdb756d5 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -159,6 +159,7 @@ BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); BRPORT_ATTR_FLAG(learning, BR_LEARNING); +BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -197,6 +198,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_bpdu_guard, &brport_attr_root_block, &brport_attr_learning, + &brport_attr_unicast_flood, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, -- cgit v1.2.3 From da12c90e099789a63073fc82a19542ce54d4efb9 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 6 Jun 2013 14:49:11 +0800 Subject: netlink: Add compare function for netlink_table As we know, netlink sockets are private resource of net namespace, they can communicate with each other only when they in the same net namespace. this works well until we try to add namespace support for other subsystems which use netlink. Don't like ipv4 and route table.., it is not suited to make these subsytems belong to net namespace, Such as audit and crypto subsystems,they are more suitable to user namespace. So we must have the ability to make the netlink sockets in same user namespace can communicate with each other. This patch adds a new function pointer "compare" for netlink_table, we can decide if the netlink sockets can communicate with each other through this netlink_table self-defined compare function. The behavior isn't changed if we don't provide the compare function for netlink_table. Signed-off-by: Gao feng Acked-by: Serge E. Hallyn Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 33 +++++++++++++++++++++++++-------- net/netlink/af_netlink.h | 1 + 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 68c16737439..9b6b115e008 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -858,16 +858,23 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } +static bool netlink_compare(struct net *net, struct sock *sk) +{ + return net_eq(sock_net(sk), net); +} + static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { - struct nl_portid_hash *hash = &nl_table[protocol].hash; + struct netlink_table *table = &nl_table[protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; struct sock *sk; read_lock(&nl_table_lock); head = nl_portid_hashfn(hash, portid); sk_for_each(sk, head) { - if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { + if (table->compare(net, sk) && + (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; } @@ -980,7 +987,8 @@ netlink_update_listeners(struct sock *sk) static int netlink_insert(struct sock *sk, struct net *net, u32 portid) { - struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct netlink_table *table = &nl_table[sk->sk_protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; @@ -990,7 +998,8 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) head = nl_portid_hashfn(hash, portid); len = 0; sk_for_each(osk, head) { - if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) + if (table->compare(net, osk) && + (nlk_sk(osk)->portid == portid)) break; len++; } @@ -1165,6 +1174,7 @@ static int netlink_release(struct socket *sock) kfree_rcu(old, rcu); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].bind = NULL; + nl_table[sk->sk_protocol].compare = NULL; nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } @@ -1187,7 +1197,8 @@ static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct netlink_table *table = &nl_table[sk->sk_protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; struct sock *osk; s32 portid = task_tgid_vnr(current); @@ -1199,7 +1210,7 @@ retry: netlink_table_grab(); head = nl_portid_hashfn(hash, portid); sk_for_each(osk, head) { - if (!net_eq(sock_net(osk), net)) + if (!table->compare(net, osk)) continue; if (nlk_sk(osk)->portid == portid) { /* Bind collision, search negative portid values. */ @@ -2315,9 +2326,12 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, rcu_assign_pointer(nl_table[unit].listeners, listeners); nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; + nl_table[unit].compare = netlink_compare; if (cfg) { nl_table[unit].bind = cfg->bind; nl_table[unit].flags = cfg->flags; + if (cfg->compare) + nl_table[unit].compare = cfg->compare; } nl_table[unit].registered = 1; } else { @@ -2740,6 +2754,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *s; struct nl_seq_iter *iter; + struct net *net; int i, j; ++*pos; @@ -2747,11 +2762,12 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) return netlink_seq_socket_idx(seq, 0); + net = seq_file_net(seq); iter = seq->private; s = v; do { s = sk_next(s); - } while (s && sock_net(s) != seq_file_net(seq)); + } while (s && !nl_table[s->sk_protocol].compare(net, s)); if (s) return s; @@ -2763,7 +2779,8 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); - while (s && sock_net(s) != seq_file_net(seq)) + + while (s && !nl_table[s->sk_protocol].compare(net, s)) s = sk_next(s); if (s) { iter->link = i; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index ed8522265f4..eaa88d187cd 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -73,6 +73,7 @@ struct netlink_table { struct mutex *cb_mutex; struct module *module; void (*bind)(int group); + bool (*compare)(struct net *net, struct sock *sock); int registered; }; -- cgit v1.2.3 From b41abb42bf62a85a32c41dab873220598a6ee266 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Pan=28=E6=BD=98=E5=8D=AB=E5=B9=B3=29?= Date: Thu, 6 Jun 2013 21:27:21 +0800 Subject: net: pass correct parameter to skb_headers_offset_update() Since commit 1a37e412a022(net: Use 16bits for *_headers fields of struct skbuff), skb->*_header are relative to skb->head, so copy_skb_header() should not call skb_headers_offset_update() now, and we should pass correct parameter to skb_headers_offset_update() in pskb_expand_head() and skb_copy_expand(). Signed-off-by: Weiping Pan Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/core/skbuff.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4a4181e16c1..edf37578e21 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -909,18 +909,8 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off) static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { -#ifndef NET_SKBUFF_DATA_USES_OFFSET - /* - * Shift between the two data areas in bytes - */ - unsigned long offset = new->data - old->data; -#endif - __copy_skb_header(new, old); -#ifndef NET_SKBUFF_DATA_USES_OFFSET - skb_headers_offset_update(new, offset); -#endif skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; @@ -1112,7 +1102,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->end = skb->head + size; #endif skb->tail += off; - skb_headers_offset_update(skb, off); + skb_headers_offset_update(skb, nhead); /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += nhead; @@ -1207,9 +1197,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, off = newheadroom - oldheadroom; if (n->ip_summed == CHECKSUM_PARTIAL) n->csum_start += off; -#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb_headers_offset_update(n, off); -#endif return n; } -- cgit v1.2.3 From 45203a3b380cee28f570475c0d28c169f908c209 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Jun 2013 08:43:22 -0700 Subject: net_sched: add 64bit rate estimators struct gnet_stats_rate_est contains u32 fields, so the bytes per second field can wrap at 34360Mbit. Add a new gnet_stats_rate_est64 structure to get 64bit bps/pps fields, and switch the kernel to use this structure natively. This structure is dumped to user space as a new attribute : TCA_STATS_RATE_EST64 Old tc command will now display the capped bps (to 34360Mbit), instead of wrapped values, and updated tc command will display correct information. Old tc command output, after patch : eric:~# tc -s -d qd sh dev lo qdisc pfifo 8001: root refcnt 2 limit 1000p Sent 80868245400 bytes 1978837 pkt (dropped 0, overlimits 0 requeues 0) rate 34360Mbit 189696pps backlog 0b 0p requeues 0 This patch carefully reorganizes "struct Qdisc" layout to get optimal performance on SMP. Signed-off-by: Eric Dumazet Cc: Ben Hutchings Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 12 ++++++------ net/core/gen_stats.c | 22 +++++++++++++++++----- net/netfilter/xt_rateest.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 2 +- net/sched/sch_qfq.c | 2 +- 8 files changed, 29 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index d9d198aa9fe..6b5b6e7013c 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -82,7 +82,7 @@ struct gen_estimator { struct list_head list; struct gnet_stats_basic_packed *bstats; - struct gnet_stats_rate_est *rate_est; + struct gnet_stats_rate_est64 *rate_est; spinlock_t *stats_lock; int ewma_log; u64 last_bytes; @@ -167,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est) static struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est *rate_est) + const struct gnet_stats_rate_est64 *rate_est) { struct rb_node *p = est_root.rb_node; @@ -203,7 +203,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * */ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { @@ -258,7 +258,7 @@ EXPORT_SYMBOL(gen_new_estimator); * Note : Caller should respect an RCU grace period before freeing stats_lock */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est) + struct gnet_stats_rate_est64 *rate_est) { struct gen_estimator *e; @@ -290,7 +290,7 @@ EXPORT_SYMBOL(gen_kill_estimator); * Returns 0 on success or a negative error code. */ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { gen_kill_estimator(bstats, rate_est); @@ -306,7 +306,7 @@ EXPORT_SYMBOL(gen_replace_estimator); * Returns true if estimator is active, and false if not. */ bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est *rate_est) + const struct gnet_stats_rate_est64 *rate_est) { bool res; diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index ddedf211e58..9d3d9e78397 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -143,18 +143,30 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); int gnet_stats_copy_rate_est(struct gnet_dump *d, const struct gnet_stats_basic_packed *b, - struct gnet_stats_rate_est *r) + struct gnet_stats_rate_est64 *r) { + struct gnet_stats_rate_est est; + int res; + if (b && !gen_estimator_active(b, r)) return 0; + est.bps = min_t(u64, UINT_MAX, r->bps); + /* we have some time before reaching 2^32 packets per second */ + est.pps = r->pps; + if (d->compat_tc_stats) { - d->tc_stats.bps = r->bps; - d->tc_stats.pps = r->pps; + d->tc_stats.bps = est.bps; + d->tc_stats.pps = est.pps; } - if (d->tail) - return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r)); + if (d->tail) { + res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est)); + if (res < 0 || est.bps == r->bps) + return res; + /* emit 64bit stats only if needed */ + return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r)); + } return 0; } diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index ed0db15ab00..7720b036d76 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -18,7 +18,7 @@ static bool xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rateest_match_info *info = par->matchinfo; - struct gnet_stats_rate_est *r; + struct gnet_stats_rate_est64 *r; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 1bc210ffcba..71a56886255 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -130,7 +130,7 @@ struct cbq_class { psched_time_t penalized; struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct tc_cbq_xstats xstats; struct tcf_proto *filter_list; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 759b308d1a8..8302717ea30 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -25,7 +25,7 @@ struct drr_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct list_head alist; struct Qdisc *qdisc; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 9facea03fae..c4075610502 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -114,7 +114,7 @@ struct hfsc_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; unsigned int level; /* class level in hierarchy */ struct tcf_proto *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index adaedd79389..162fb800754 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -78,7 +78,7 @@ struct htb_class { /* general class parameters */ struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct tc_htb_xstats xstats; /* our special stats */ int refcnt; /* usage count of this class */ diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d51852bba01..7c195d972bf 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -138,7 +138,7 @@ struct qfq_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct Qdisc *qdisc; struct list_head alist; /* Link for active-classes list. */ struct qfq_aggregate *agg; /* Parent aggregate. */ -- cgit v1.2.3 From ecccd072b07e7fd09c54d0f86f9374e2645cde97 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 10 Jun 2013 13:17:21 -0700 Subject: mac80211: fix mesh deadlock The patch "cfg80211/mac80211: use cfg80211 wdev mutex in mac80211" introduced several deadlocks by converting the ifmsh->mtx to wdev->mtx. Solve these by: 1. drop the cancel_work_sync() in ieee80211_stop_mesh(). Instead make the mesh work conditional on whether the mesh is running or not. 2. lock the mesh work with sdata_lock() to protect beacon updates and prevent races with wdev->mesh_id_len or cfg80211. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 29 +++++++++++++++++------------ net/mac80211/mesh_plink.c | 7 +------ 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 73a597bad6e..d5faf91632c 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -579,9 +579,7 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); - sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); - sdata_unlock(sdata); mod_timer(&ifmsh->housekeeping_timer, round_jiffies(jiffies + @@ -788,12 +786,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.enable_beacon = false; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - sdata_lock(sdata); bcn = rcu_dereference_protected(ifmsh->beacon, lockdep_is_held(&sdata->wdev.mtx)); rcu_assign_pointer(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); - sdata_unlock(sdata); /* flush STAs and mpaths on this iface */ sta_info_flush(sdata); @@ -806,14 +802,6 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); del_timer_sync(&sdata->u.mesh.mesh_path_timer); - /* - * If the timer fired while we waited for it, it will have - * requeued the work. Now the work will be running again - * but will not rearm the timer again because it checks - * whether the interface is running, which, at this point, - * it no longer is. - */ - cancel_work_sync(&sdata->work); local->fif_other_bss--; atomic_dec(&local->iff_allmultis); @@ -954,6 +942,12 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; u16 stype; + sdata_lock(sdata); + + /* mesh already went down */ + if (!sdata->wdev.mesh_id_len) + goto out; + rx_status = IEEE80211_SKB_RXCB(skb); mgmt = (struct ieee80211_mgmt *) skb->data; stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; @@ -971,12 +965,20 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); break; } +out: + sdata_unlock(sdata); } void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + sdata_lock(sdata); + + /* mesh already went down */ + if (!sdata->wdev.mesh_id_len) + goto out; + if (ifmsh->preq_queue_len && time_after(jiffies, ifmsh->last_preq + msecs_to_jiffies(ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval))) @@ -996,6 +998,9 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags)) mesh_sync_adjust_tbtt(sdata); + +out: + sdata_unlock(sdata); } void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 6c4da99bc4f..09bebed9941 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -517,9 +517,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, ieee80211_mps_frame_release(sta, elems); out: rcu_read_unlock(); - sdata_lock(sdata); ieee80211_mbss_info_change_notify(sdata, changed); - sdata_unlock(sdata); } static void mesh_plink_timer(unsigned long data) @@ -1070,9 +1068,6 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); - if (changed) { - sdata_lock(sdata); + if (changed) ieee80211_mbss_info_change_notify(sdata, changed); - sdata_unlock(sdata); - } } -- cgit v1.2.3 From 8e7c053853b7d299e8a2b8733659b0df8eee51f7 Mon Sep 17 00:00:00 2001 From: Colleen Twitty Date: Mon, 3 Jun 2013 09:53:39 -0700 Subject: {nl,cfg}80211: make peer link expiration time configurable If a STA has a peer that it hasn't seen any tx activity from for a certain length of time, the peer link is expired. This means the inactive STA is removed from the list of peers and that STA is not considered a peer again unless it re-peers. Previously, this inactivity time was always 30 minutes. Now, add it to the mesh configuration and allow it to be configured. Retain 30 minutes as a default value. Signed-off-by: Colleen Twitty Signed-off-by: Johannes Berg --- net/wireless/mesh.c | 2 ++ net/wireless/nl80211.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 5dfb289ab76..0daaf72e1b8 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -18,6 +18,7 @@ #define MESH_PATH_TO_ROOT_TIMEOUT 6000 #define MESH_ROOT_INTERVAL 5000 #define MESH_ROOT_CONFIRMATION_INTERVAL 2000 +#define MESH_DEFAULT_PLINK_TIMEOUT 1800 /* timeout in seconds */ /* * Minimum interval between two consecutive PREQs originated by the same @@ -75,6 +76,7 @@ const struct mesh_config default_mesh_config = { .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, .power_mode = NL80211_MESH_POWER_ACTIVE, .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, + .plink_timeout = MESH_DEFAULT_PLINK_TIMEOUT, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 88e820b7367..8aa83c04d4e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4575,7 +4575,9 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, cur_params.power_mode) || nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, - cur_params.dot11MeshAwakeWindowDuration)) + cur_params.dot11MeshAwakeWindowDuration) || + nla_put_u32(msg, NL80211_MESHCONF_PLINK_TIMEOUT, + cur_params.plink_timeout)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -4616,6 +4618,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 }, [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, + [NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 }, }; static const struct nla_policy @@ -4753,6 +4756,9 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, 0, 65535, mask, NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 1, 0xffffffff, + mask, NL80211_MESHCONF_PLINK_TIMEOUT, + nla_get_u32); if (mask_out) *mask_out = mask; -- cgit v1.2.3 From 66de671374f003467b5ef7c65ecbe1930480c8c9 Mon Sep 17 00:00:00 2001 From: Colleen Twitty Date: Mon, 3 Jun 2013 09:53:40 -0700 Subject: mac80211: expire mesh peers based on mesh configuration The time it takes to see the peer link expire may differ by a minute since sta_expire() is run once a minute as a mesh housekeeping task. Signed-off-by: Colleen Twitty Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 ++ net/mac80211/mesh.c | 2 +- net/mac80211/mesh.h | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 30622101d3b..344a5796807 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1871,6 +1871,8 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) conf->dot11MeshAwakeWindowDuration = nconf->dot11MeshAwakeWindowDuration; + if (_chg_mesh_attr(NL80211_MESHCONF_PLINK_TIMEOUT, mask)) + conf->plink_timeout = nconf->plink_timeout; ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); return 0; } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index d5faf91632c..4ee527f7867 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -575,7 +575,7 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 changed; - ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); + ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ); mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 8b4d9a3e9ee..01a28bca6e9 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -188,7 +188,6 @@ struct mesh_rmc { u32 idx_mask; }; -#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) #define MESH_PATH_EXPIRE (600 * HZ) -- cgit v1.2.3 From ffb3cf3000aa12facdccbdfcb10bfebda7199209 Mon Sep 17 00:00:00 2001 From: Ashok Nagarajan Date: Mon, 3 Jun 2013 10:33:36 -0700 Subject: {nl,mac,cfg}80211: Allow user to configure basic rates for mesh Currently mesh uses mandatory rates as the default basic rates. Allow basic rates to be configured during mesh join. Basic rates are applied only if channel is also provided with mesh join command. Signed-off-by: Ashok Nagarajan [some whitespace fixes, refuse basic rates w/o channel] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 1 + net/mac80211/mesh.c | 4 ---- net/wireless/mesh.c | 10 ++++++++++ net/wireless/nl80211.c | 17 +++++++++++++++++ 4 files changed, 28 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 344a5796807..cd6f35f6e71 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1759,6 +1759,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, /* mcast rate setting in Mesh Node */ memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, sizeof(setup->mcast_rate)); + sdata->vif.bss_conf.basic_rates = setup->basic_rates; sdata->vif.bss_conf.beacon_int = setup->beacon_interval; sdata->vif.bss_conf.dtim_period = setup->dtim_period; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 4ee527f7867..6c33af482df 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -738,9 +738,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); - struct ieee80211_supported_band *sband = - sdata->local->hw.wiphy->bands[band]; local->fif_other_bss++; /* mesh ifaces must set allmulti to forward mcast traffic */ @@ -758,7 +755,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.ht_operation_mode = ifmsh->mshcfg.ht_opmode; sdata->vif.bss_conf.enable_beacon = true; - sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(sband); changed |= ieee80211_mps_local_status_update(sdata); diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0daaf72e1b8..30c49202ee4 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -162,6 +162,16 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, setup->chandef.center_freq1 = setup->chandef.chan->center_freq; } + /* + * check if basic rates are available otherwise use mandatory rates as + * basic rates + */ + if (!setup->basic_rates) { + struct ieee80211_supported_band *sband = + rdev->wiphy.bands[setup->chandef.chan->band]; + setup->basic_rates = ieee80211_mandatory_rates(sband); + } + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) return -EINVAL; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8aa83c04d4e..687cb649759 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7487,6 +7487,23 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) setup.chandef.chan = NULL; } + if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { + u8 *rates = nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + int n_rates = + nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + struct ieee80211_supported_band *sband; + + if (!setup.chandef.chan) + return -EINVAL; + + sband = rdev->wiphy.bands[setup.chandef.chan->band]; + + err = ieee80211_get_ratemask(sband, rates, n_rates, + &setup.basic_rates); + if (err) + return err; + } + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } -- cgit v1.2.3 From 3d124ea27ae2fc895f81725f0b4c7f3d9c733df4 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 27 May 2013 18:24:02 +0300 Subject: cfg80211: fix VHT TDLS peer AID verification I (Johannes) accidentally applied the first version of the patch ("Allow TDLS peer AID to be configured for VHT"). Now apply just the changes between v1 and v2 to get the AID verification and prefer the new attribute over the old one. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 687cb649759..7183410fcd4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3972,10 +3972,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - if (info->attrs[NL80211_ATTR_STA_AID]) - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); - else + if (info->attrs[NL80211_ATTR_PEER_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); + else + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; @@ -4027,7 +4027,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; /* TDLS peers cannot be added */ - if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) || + info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; /* but don't bother the driver with it */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); @@ -4053,7 +4054,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) return -EINVAL; /* TDLS peers cannot be added */ - if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) || + info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; break; case NL80211_IFTYPE_STATION: -- cgit v1.2.3 From f7aeb6fb1a3d6b09623b169518314bc7869fffec Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 11 Jun 2013 14:20:00 +0200 Subject: mac80211: make mgmt_tx accept a NULL channel cfg80211 passes a NULL channel to mgmt_tx if the frame has to be sent on the one currently in use by the device. Make the implementation of mgmt_tx correctly handle this case. Fail if offchan is required. Signed-off-by: Antonio Quartulli [fix RCU locking] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index cd6f35f6e71..64cf294c2b9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2841,6 +2841,12 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, return -EOPNOTSUPP; } + /* configurations requiring offchan cannot work if no channel has been + * specified + */ + if (need_offchan && !chan) + return -EINVAL; + mutex_lock(&local->mtx); /* Check if the operating channel is the requested channel */ @@ -2850,10 +2856,15 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (chanctx_conf) - need_offchan = chan != chanctx_conf->def.chan; - else + if (chanctx_conf) { + need_offchan = chan && (chan != chanctx_conf->def.chan); + } else if (!chan) { + ret = -EINVAL; + rcu_read_unlock(); + goto out_unlock; + } else { need_offchan = true; + } rcu_read_unlock(); } -- cgit v1.2.3 From ea141b75ae29636b5c9e9d2e2e77b3dd1ab4c934 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 11 Jun 2013 14:20:03 +0200 Subject: nl80211: allow sending CMD_FRAME without specifying any frequency Users may want to send a frame on the current channel without specifying it. This is particularly useful for the correct implementation of the IBSS/RSN support in wpa_supplicant which requires to receive and send AUTH frames. Make mgmt_tx pass a NULL channel to the driver if none has been specified by the user. Signed-off-by: Antonio Quartulli Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7183410fcd4..398ce2c5968 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7147,6 +7147,9 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; switch (wdev->iftype) { + case NL80211_IFTYPE_P2P_DEVICE: + if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) + return -EINVAL; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_CLIENT: @@ -7154,7 +7157,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_P2P_DEVICE: break; default: return -EOPNOTSUPP; @@ -7182,9 +7184,18 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); - err = nl80211_parse_chandef(rdev, info, &chandef); - if (err) - return err; + /* get the channel if any has been specified, otherwise pass NULL to + * the driver. The latter will use the current one + */ + chandef.chan = NULL; + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + err = nl80211_parse_chandef(rdev, info, &chandef); + if (err) + return err; + } + + if (!chandef.chan && offchan) + return -EINVAL; if (!dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); -- cgit v1.2.3 From 940d0ac9dbe3fb9d4806e96f006286c2e476deed Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 Jun 2013 16:51:03 +0200 Subject: cfg80211: fix rtnl leak in wiphy dump error cases In two wiphy dump error cases, most often when the dump allocation must be increased, the RTNL is leaked. This quickly results in a complete system lockup. Release the RTNL correctly. Reported-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 398ce2c5968..e4028197b75 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1541,8 +1541,10 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); netdev = dev_get_by_index(sock_net(skb->sk), ifidx); - if (!netdev) + if (!netdev) { + rtnl_unlock(); return -ENODEV; + } if (netdev->ieee80211_ptr) { dev = wiphy_to_dev( netdev->ieee80211_ptr->wiphy); @@ -1586,6 +1588,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) !skb->len && cb->min_dump_alloc < 4096) { cb->min_dump_alloc = 4096; + rtnl_unlock(); return 1; } idx--; -- cgit v1.2.3 From 130d3d68b52097c7ae081109f700b02776adcb9c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Jun 2013 13:56:19 -0700 Subject: net_sched: psched_ratecfg_precompute() improvements Before allowing 64bits bytes rates, refactor psched_ratecfg_precompute() to get better comments and increased accuracy. rate_bps field is renamed to rate_bytes_ps, as we only have to worry about bytes per second. Signed-off-by: Eric Dumazet Cc: Ben Greear Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 20224086cc2..4626cef4b76 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -901,37 +901,33 @@ void dev_shutdown(struct net_device *dev) void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf) { - u64 factor; - u64 mult; - int shift; - memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; - r->rate_bps = (u64)conf->rate << 3; + r->rate_bytes_ps = conf->rate; r->mult = 1; /* - * Calibrate mult, shift so that token counting is accurate - * for smallest packet size (64 bytes). Token (time in ns) is - * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will - * work as long as the smallest packet transfer time can be - * accurately represented in nanosec. + * The deal here is to replace a divide by a reciprocal one + * in fast path (a reciprocal divide is a multiply and a shift) + * + * Normal formula would be : + * time_in_ns = (NSEC_PER_SEC * len) / rate_bps + * + * We compute mult/shift to use instead : + * time_in_ns = (len * mult) >> shift; + * + * We try to get the highest possible mult value for accuracy, + * but have to make sure no overflows will ever happen. */ - if (r->rate_bps > 0) { - /* - * Higher shift gives better accuracy. Find the largest - * shift such that mult fits in 32 bits. - */ - for (shift = 0; shift < 16; shift++) { - r->shift = shift; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - mult = div64_u64(factor, r->rate_bps); - if (mult > UINT_MAX) + if (r->rate_bytes_ps > 0) { + u64 factor = NSEC_PER_SEC; + + for (;;) { + r->mult = div64_u64(factor, r->rate_bytes_ps); + if (r->mult & (1U << 31) || factor & (1ULL << 63)) break; + factor <<= 1; + r->shift++; } - - r->shift = shift - 1; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - r->mult = div64_u64(factor, r->rate_bps); } } EXPORT_SYMBOL(psched_ratecfg_precompute); -- cgit v1.2.3 From 64153ce0a7b61b2a5cacb01805cbf670142339e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Jun 2013 14:53:16 -0700 Subject: net_sched: htb: do not setup default rate estimators With a thousand htb classes, est_timer() spends ~5 million cpu cycles and throws out cpu cache, because each htb class has a default rate estimator (est 4sec 16sec). Most users do not use default rate estimators, so switch htb to not setup ones. Add a module parameter (htb_rate_est) so that users relying on this default rate estimator can revert the behavior. echo 1 >/sys/module/sch_htb/parameters/htb_rate_est Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 162fb800754..1a3655a606c 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -65,6 +65,10 @@ static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis f module_param (htb_hysteresis, int, 0640); MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); +static int htb_rate_est = 0; /* htb classes have a default rate estimator */ +module_param(htb_rate_est, int, 0640); +MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes"); + /* used internaly to keep status of single class */ enum htb_cmode { HTB_CANT_SEND, /* class can't send and can't borrow */ @@ -1366,12 +1370,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; - err = gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE] ? : &est.nla); - if (err) { - kfree(cl); - goto failure; + if (htb_rate_est || tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE] ? : &est.nla); + if (err) { + kfree(cl); + goto failure; + } } cl->refcnt = 1; -- cgit v1.2.3 From e9897071350bd9d94a56b5b6f79c85b1a98fc7e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Jun 2013 08:48:57 -0700 Subject: igmp: hash a hash table to speedup ip_check_mc_rcu() After IP route cache removal, multicast applications using a lot of multicast addresses hit a O(N) behavior in ip_check_mc_rcu() Add a per in_device hash table to get faster lookup. This hash table is created only if the number of items in mc_list is above 4. Reported-by: Shawn Bohrer Signed-off-by: Eric Dumazet Tested-by: Shawn Bohrer Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 1 + net/ipv4/igmp.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 71 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b047e2d8a61..3469506c106 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -215,6 +215,7 @@ void in_dev_finish_destroy(struct in_device *idev) WARN_ON(idev->ifa_list); WARN_ON(idev->mc_list); + kfree(rcu_dereference_protected(idev->mc_hash, 1)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 450f625361e..f72011df9c5 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1217,6 +1217,57 @@ static void igmp_group_added(struct ip_mc_list *im) * Multicast list managers */ +static u32 ip_mc_hash(const struct ip_mc_list *im) +{ + return hash_32((u32)im->multiaddr, MC_HASH_SZ_LOG); +} + +static void ip_mc_hash_add(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash; + u32 hash; + + mc_hash = rtnl_dereference(in_dev->mc_hash); + if (mc_hash) { + hash = ip_mc_hash(im); + im->next_hash = rtnl_dereference(mc_hash[hash]); + rcu_assign_pointer(mc_hash[hash], im); + return; + } + + /* do not use a hash table for small number of items */ + if (in_dev->mc_count < 4) + return; + + mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG, + GFP_KERNEL); + if (!mc_hash) + return; + + for_each_pmc_rtnl(in_dev, im) { + hash = ip_mc_hash(im); + im->next_hash = rtnl_dereference(mc_hash[hash]); + RCU_INIT_POINTER(mc_hash[hash], im); + } + + rcu_assign_pointer(in_dev->mc_hash, mc_hash); +} + +static void ip_mc_hash_remove(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash); + struct ip_mc_list *aux; + + if (!mc_hash) + return; + mc_hash += ip_mc_hash(im); + while ((aux = rtnl_dereference(*mc_hash)) != im) + mc_hash = &aux->next_hash; + *mc_hash = im->next_hash; +} + /* * A socket has joined a multicast group on device dev. @@ -1258,6 +1309,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) in_dev->mc_count++; rcu_assign_pointer(in_dev->mc_list, im); + ip_mc_hash_add(in_dev, im); + #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1314,6 +1367,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ip = &i->next_rcu) { if (i->multiaddr == addr) { if (--i->users == 0) { + ip_mc_hash_remove(in_dev, i); *ip = i->next_rcu; in_dev->mc_count--; igmp_group_dropped(i); @@ -2321,12 +2375,25 @@ void ip_mc_drop_socket(struct sock *sk) int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; + struct ip_mc_list __rcu **mc_hash; struct ip_sf_list *psf; int rv = 0; - for_each_pmc_rcu(in_dev, im) { - if (im->multiaddr == mc_addr) - break; + mc_hash = rcu_dereference(in_dev->mc_hash); + if (mc_hash) { + u32 hash = hash_32((u32)mc_addr, MC_HASH_SZ_LOG); + + for (im = rcu_dereference(mc_hash[hash]); + im != NULL; + im = rcu_dereference(im->next_hash)) { + if (im->multiaddr == mc_addr) + break; + } + } else { + for_each_pmc_rcu(in_dev, im) { + if (im->multiaddr == mc_addr) + break; + } } if (im && proto == IPPROTO_IGMP) { rv = 1; -- cgit v1.2.3 From 946d3bd7231be3b6202759ea0bea59989ae28c4a Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Fri, 7 Jun 2013 12:34:43 -0500 Subject: igmp: remove unnecessary in_device member zeroing ip_mc_init_dev() is passed a freshly kzalloc'd in_device so it is unnecessary to explicitly zero out the members. Signed-off-by: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f72011df9c5..a09190ddffb 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1435,13 +1435,9 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - in_dev->mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST - in_dev->mr_gq_running = 0; setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, (unsigned long)in_dev); - in_dev->mr_ifc_count = 0; - in_dev->mc_count = 0; setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; -- cgit v1.2.3 From da5bab079f9b7d90ba234965a14914ace55e45e9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 8 Jun 2013 12:56:03 +0200 Subject: net: udp4: move GSO functions to udp_offload Similarly to TCP offloading and UDPv6 offloading, move all related UDPv4 functions to udp_offload.c to make things more explicit. Also, by this, we can make those functions static. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/Makefile | 2 +- net/ipv4/af_inet.c | 9 +---- net/ipv4/udp.c | 75 +------------------------------------ net/ipv4/udp_offload.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 82 deletions(-) create mode 100644 net/ipv4/udp_offload.c (limited to 'net') diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4d3e138c564..7fcf8101d85 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -9,7 +9,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ - arp.o icmp.o devinet.o af_inet.o igmp.o \ + udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7b514290efc..5598b06d62d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1566,13 +1566,6 @@ static const struct net_protocol udp_protocol = { .netns_ok = 1, }; -static const struct net_offload udp_offload = { - .callbacks = { - .gso_send_check = udp4_ufo_send_check, - .gso_segment = udp4_ufo_fragment, - }, -}; - static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = icmp_err, @@ -1672,7 +1665,7 @@ static int __init ipv4_offload_init(void) /* * Add offloads */ - if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) + if (udpv4_offload_init() < 0) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); if (tcpv4_offload_init() < 0) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2955b25aee6..f65bc32c026 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2290,29 +2290,8 @@ void __init udp_init(void) sysctl_udp_wmem_min = SK_MEM_QUANTUM; } -int udp4_ufo_send_check(struct sk_buff *skb) -{ - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - return -EINVAL; - - if (likely(!skb->encapsulation)) { - const struct iphdr *iph; - struct udphdr *uh; - - iph = ip_hdr(skb); - uh = udp_hdr(skb); - - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - } - return 0; -} - -static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); int mac_len = skb->mac_len; @@ -2371,53 +2350,3 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, out: return segs; } - -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE | SKB_GSO_MPLS) || - !(type & (SKB_GSO_UDP)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Fragment the skb. IP headers of the fragments are updated in - * inet_gso_segment() - */ - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) - segs = skb_udp_tunnel_segment(skb, features); - else { - int offset; - __wsum csum; - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - segs = skb_segment(skb, features); - } -out: - return segs; -} diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c new file mode 100644 index 00000000000..f35eccaa855 --- /dev/null +++ b/net/ipv4/udp_offload.c @@ -0,0 +1,100 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * UDPv4 GSO support + */ + +#include +#include +#include + +static int udp4_ufo_send_check(struct sk_buff *skb) +{ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + return -EINVAL; + + if (likely(!skb->encapsulation)) { + const struct iphdr *iph; + struct udphdr *uh; + + iph = ip_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + } + + return 0; +} + +static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | + SKB_GSO_GRE | SKB_GSO_MPLS) || + !(type & (SKB_GSO_UDP)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + /* Fragment the skb. IP headers of the fragments are updated in + * inet_gso_segment() + */ + if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + segs = skb_udp_tunnel_segment(skb, features); + else { + int offset; + __wsum csum; + + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + segs = skb_segment(skb, features); + } +out: + return segs; +} + +static const struct net_offload udpv4_offload = { + .callbacks = { + .gso_send_check = udp4_ufo_send_check, + .gso_segment = udp4_ufo_fragment, + }, +}; + +int __init udpv4_offload_init(void) +{ + return inet_add_offload(&udpv4_offload, IPPROTO_UDP); +} -- cgit v1.2.3 From 7a6e288d2745611bef5b614acf19644283765732 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 8 Jun 2013 14:18:16 +0200 Subject: pktgen: ipv6: numa: consolidate skb allocation to pktgen_alloc_skb We currently allow for numa-node aware skb allocation only within the fill_packet_ipv4() path, but not in fill_packet_ipv6(). Consolidate that code to a common allocation helper to enable numa-node aware skb allocation for ipv6, and use it in both paths. This also makes both functions a bit more readable. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/pktgen.c | 52 +++++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 303412d8332..9640972ec50 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2627,6 +2627,29 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, pgh->tv_usec = htonl(timestamp.tv_usec); } +static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, + struct pktgen_dev *pkt_dev, + unsigned int extralen) +{ + struct sk_buff *skb = NULL; + unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen + + pkt_dev->pkt_overhead; + + if (pkt_dev->flags & F_NODE) { + int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id(); + + skb = __alloc_skb(NET_SKB_PAD + size, GFP_NOWAIT, 0, node); + if (likely(skb)) { + skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + } + } else { + skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); + } + + return skb; +} + static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2657,32 +2680,13 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, datalen = (odev->hard_header_len + 16) & ~0xf; - if (pkt_dev->flags & F_NODE) { - int node; - - if (pkt_dev->node >= 0) - node = pkt_dev->node; - else - node = numa_node_id(); - - skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64 - + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node); - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD); - skb->dev = odev; - } - } - else - skb = __netdev_alloc_skb(odev, - pkt_dev->cur_pkt_size + 64 - + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); - + skb = pktgen_alloc_skb(odev, pkt_dev, datalen); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } - prefetchw(skb->data); + prefetchw(skb->data); skb_reserve(skb, datalen); /* Reserve for ethernet and IP header */ @@ -2786,15 +2790,13 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - skb = __netdev_alloc_skb(odev, - pkt_dev->cur_pkt_size + 64 - + 16 + pkt_dev->pkt_overhead, GFP_NOWAIT); + skb = pktgen_alloc_skb(odev, pkt_dev, 16); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } - prefetchw(skb->data); + prefetchw(skb->data); skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ -- cgit v1.2.3 From c70eba74532a9b54583689fead6e2e8f3a86e1c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jun 2013 14:11:16 -0700 Subject: igmp: fix new sparse errors Fix following sparse errors : net/ipv4/igmp.c:1222:25: warning: cast from restricted __be32 net/ipv4/igmp.c:1234:31: warning: incorrect type in assignment (different address spaces) net/ipv4/igmp.c:1234:31: expected struct ip_mc_list [noderef] *next_hash net/ipv4/igmp.c:1234:31: got struct ip_mc_list * net/ipv4/igmp.c:1250:31: warning: incorrect type in assignment (different address spaces) net/ipv4/igmp.c:1250:31: expected struct ip_mc_list [noderef] *next_hash net/ipv4/igmp.c:1250:31: got struct ip_mc_list * net/ipv4/igmp.c:2380:37: warning: cast from restricted __be32 These were added by commit e9897071350bd9 ("igmp: hash a hash table to speedup ip_check_mc_rcu()") Reported-by: kbuild test robot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a09190ddffb..cd71190d296 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1219,7 +1219,7 @@ static void igmp_group_added(struct ip_mc_list *im) static u32 ip_mc_hash(const struct ip_mc_list *im) { - return hash_32((u32)im->multiaddr, MC_HASH_SZ_LOG); + return hash_32((__force u32)im->multiaddr, MC_HASH_SZ_LOG); } static void ip_mc_hash_add(struct in_device *in_dev, @@ -1231,7 +1231,7 @@ static void ip_mc_hash_add(struct in_device *in_dev, mc_hash = rtnl_dereference(in_dev->mc_hash); if (mc_hash) { hash = ip_mc_hash(im); - im->next_hash = rtnl_dereference(mc_hash[hash]); + im->next_hash = mc_hash[hash]; rcu_assign_pointer(mc_hash[hash], im); return; } @@ -1247,7 +1247,7 @@ static void ip_mc_hash_add(struct in_device *in_dev, for_each_pmc_rtnl(in_dev, im) { hash = ip_mc_hash(im); - im->next_hash = rtnl_dereference(mc_hash[hash]); + im->next_hash = mc_hash[hash]; RCU_INIT_POINTER(mc_hash[hash], im); } @@ -2377,7 +2377,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u mc_hash = rcu_dereference(in_dev->mc_hash); if (mc_hash) { - u32 hash = hash_32((u32)mc_addr, MC_HASH_SZ_LOG); + u32 hash = hash_32((__force u32)mc_addr, MC_HASH_SZ_LOG); for (im = rcu_dereference(mc_hash[hash]); im != NULL; -- cgit v1.2.3 From 5b9b6263775d45ccc0c8b27344bfb1a97cf6f725 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jun 2013 14:23:15 -0700 Subject: gro: remove a sparse error Fix following sparse error : net/ipv4/af_inet.c:1410:59: warning: restricted __be16 degrades to integer added in commit db8caf3dbc77599 ("gro: should aggregate frames without DF") Reported-by: kbuild test robot From: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5598b06d62d..b4d0be2b7ce 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1407,7 +1407,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | - ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | + (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; -- cgit v1.2.3 From 7c0cadc69ca2ac8893aa162ee80d92a805840909 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jun 2013 14:31:39 -0700 Subject: udp: fix two sparse errors commit ba418fa357a7b3c ("soreuseport: UDP/IPv4 implementation") added following sparse errors : net/ipv4/udp.c:433:60: warning: cast from restricted __be16 net/ipv4/udp.c:433:60: warning: incorrect type in argument 1 (different base types) net/ipv4/udp.c:433:60: expected unsigned short [unsigned] [usertype] val net/ipv4/udp.c:433:60: got restricted __be16 [usertype] sport net/ipv4/udp.c:433:60: warning: cast from restricted __be16 net/ipv4/udp.c:433:60: warning: cast from restricted __be16 net/ipv4/udp.c:514:60: warning: cast from restricted __be16 net/ipv4/udp.c:514:60: warning: incorrect type in argument 1 (different base types) net/ipv4/udp.c:514:60: expected unsigned short [unsigned] [usertype] val net/ipv4/udp.c:514:60: got restricted __be16 [usertype] sport net/ipv4/udp.c:514:60: warning: cast from restricted __be16 net/ipv4/udp.c:514:60: warning: cast from restricted __be16 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f65bc32c026..959502afd8d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -430,7 +430,7 @@ begin: reuseport = sk->sk_reuseport; if (reuseport) { hash = inet_ehashfn(net, daddr, hnum, - saddr, htons(sport)); + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { @@ -511,7 +511,7 @@ begin: reuseport = sk->sk_reuseport; if (reuseport) { hash = inet_ehashfn(net, daddr, hnum, - saddr, htons(sport)); + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { -- cgit v1.2.3 From 661eb3811df568161399af0048f1ecb4ac073687 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Jun 2013 22:47:56 +0200 Subject: mac80211: fix TX aggregation TID struct leak Ben reports that kmemleak is saying TX aggregation TID structs are leaked. Given his workload, I suspect that they're leaked because stations are destroyed before their aggregation sessions get a chance to start. Fix this by simply freeing structs that are not used yet. Reported-by: Ben Greear Tested-by: Ben Greear Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index b4297982d34..aaf68d29722 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -149,6 +149,7 @@ static void cleanup_single_sta(struct sta_info *sta) * directly by station destruction. */ for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + kfree(sta->ampdu_mlme.tid_start_tx[i]); tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]); if (!tid_tx) continue; -- cgit v1.2.3 From ca15febfe98f7c681ac345fc1d2ee1b8decaa493 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 13 Jun 2013 10:05:38 +0800 Subject: netlink: make compare exist all the time Commit da12c90e099789a63073fc82a19542ce54d4efb9 "netlink: Add compare function for netlink_table" only set compare at the time we create kernel netlink, and reset compare to NULL at the time we finially release netlink socket, but netlink_lookup wants the compare exist always. So we should set compare after we allocate nl_table, and never reset it. make comapre exist all the time. Reported-by: Fengguang Wu Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9b6b115e008..8978755251f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1174,7 +1174,6 @@ static int netlink_release(struct socket *sock) kfree_rcu(old, rcu); nl_table[sk->sk_protocol].module = NULL; nl_table[sk->sk_protocol].bind = NULL; - nl_table[sk->sk_protocol].compare = NULL; nl_table[sk->sk_protocol].flags = 0; nl_table[sk->sk_protocol].registered = 0; } @@ -2326,7 +2325,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, rcu_assign_pointer(nl_table[unit].listeners, listeners); nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; - nl_table[unit].compare = netlink_compare; if (cfg) { nl_table[unit].bind = cfg->bind; nl_table[unit].flags = cfg->flags; @@ -2973,6 +2971,8 @@ static int __init netlink_proto_init(void) hash->shift = 0; hash->mask = 0; hash->rehash_time = jiffies; + + nl_table[i].compare = netlink_compare; } netlink_add_usersock_entry(); -- cgit v1.2.3 From 2c928e0e8dd6b3661870bfacb53d1c330a1a7411 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 13 Jun 2013 16:04:33 +0900 Subject: sctp: Correct byte order of access to skb->{network, transport}_header Corrects an byte order conflict introduced by 158874cac61245b84e939c92c53db7000122b7b0 ("sctp: Correct access to skb->{network, transport}_header"). The values in question are host byte order. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 8ee553b499c..fffc7b62a9a 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -153,7 +153,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sctp_association *asoc; struct sctp_transport *transport; struct ipv6_pinfo *np; - __be16 saveip, savesctp; + __u16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); -- cgit v1.2.3 From e562078a19226660299eeaf40a50752672214f11 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 12 Jun 2013 14:08:44 -0700 Subject: mac80211: Ensure tid_start_tx is protected by sta->lock All accesses of the tid_start_tx lock should be protected by sta->lock if there is any chance that another thread could still be accessing the sta object. Signed-off-by: Ben Greear Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 4 +++- net/mac80211/sta_info.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 75dff338f58..f83534f6a2e 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -281,13 +281,14 @@ void ieee80211_ba_session_work(struct work_struct *work) sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_UNSPECIFIED, true); + spin_lock_bh(&sta->lock); + tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; if (tid_tx) { /* * Assign it over to the normal tid_tx array * where it "goes live". */ - spin_lock_bh(&sta->lock); sta->ampdu_mlme.tid_start_tx[tid] = NULL; /* could there be a race? */ @@ -300,6 +301,7 @@ void ieee80211_ba_session_work(struct work_struct *work) ieee80211_tx_ba_session_handle_start(sta, tid); continue; } + spin_unlock_bh(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP, diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index bd12fc54266..4208dbd5861 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -203,6 +203,7 @@ struct tid_ampdu_rx { * driver requested to close until the work for it runs * @mtx: mutex to protect all TX data (except non-NULL assignments * to tid_tx[idx], which are protected by the sta spinlock) + * tid_start_tx is also protected by sta->lock. */ struct sta_ampdu_mlme { struct mutex mtx; -- cgit v1.2.3 From a06a2d378dbf099c874ad58de07a8e54ffdc94d3 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 12 Jun 2013 21:04:16 +0800 Subject: net: ping_check_bind_addr() etc. can be static net/ipv4/ping.c:286:5: sparse: symbol 'ping_check_bind_addr' was not declared. Should it be static? net/ipv4/ping.c:355:6: sparse: symbol 'ping_set_saddr' was not declared. Should it be static? net/ipv4/ping.c:370:6: sparse: symbol 'ping_clear_saddr' was not declared. Should it be static? net/ipv6/ping.c:60:5: sparse: symbol 'dummy_ipv6_recv_error' was not declared. Should it be static? net/ipv6/ping.c:64:5: sparse: symbol 'dummy_ip6_datagram_recv_ctl' was not declared. Should it be static? net/ipv6/ping.c:69:5: sparse: symbol 'dummy_icmpv6_err_convert' was not declared. Should it be static? net/ipv6/ping.c:73:6: sparse: symbol 'dummy_ipv6_icmp_error' was not declared. Should it be static? net/ipv6/ping.c:75:5: sparse: symbol 'dummy_ipv6_chk_addr' was not declared. Should it be static? net/ipv6/ping.c:201:5: sparse: symbol 'ping_v6_seq_show' was not declared. Should it be static? Signed-off-by: Fengguang Wu Signed-off-by: David S. Miller --- net/ipv4/ping.c | 8 ++++---- net/ipv6/ping.c | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 1f1b2dd9027..746427c9e71 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -283,8 +283,8 @@ void ping_close(struct sock *sk, long timeout) EXPORT_SYMBOL_GPL(ping_close); /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ -int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, - struct sockaddr *uaddr, int addr_len) { +static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, + struct sockaddr *uaddr, int addr_len) { struct net *net = sock_net(sk); if (sk->sk_family == AF_INET) { struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; @@ -352,7 +352,7 @@ int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, return 0; } -void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) +static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) { if (saddr->sa_family == AF_INET) { struct inet_sock *isk = inet_sk(sk); @@ -367,7 +367,7 @@ void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) } } -void ping_clear_saddr(struct sock *sk, int dif) +static void ping_clear_saddr(struct sock *sk, int dif) { sk->sk_bound_dev_if = dif; if (sk->sk_family == AF_INET) { diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index a4311038591..2b52046e126 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -57,23 +57,23 @@ static struct inet_protosw pingv6_protosw = { /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) { return -EAFNOSUPPORT; } -int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, - struct sk_buff *skb) +static int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) { return -EAFNOSUPPORT; } -int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) +static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) { return -EAFNOSUPPORT; } -void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, - __be16 port, u32 info, u8 *payload) {} -int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, int strict) +static void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) {} +static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict) { return 0; } @@ -198,7 +198,7 @@ static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) return ping_seq_start(seq, pos, AF_INET6); } -int ping_v6_seq_show(struct seq_file *seq, void *v) +static int ping_v6_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); -- cgit v1.2.3 From 194f4a6df2a92c3d0bc65a85facfbc2433b25d06 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Tue, 11 Jun 2013 23:09:29 +0200 Subject: net: make all team port device link events urgent Since team functionality relies heavily on userspace daemon, we need to deliver event to userspace via Netlink as quick as possible. So make all team port device link events urgent. Signed-off-by: Flavio Leitner Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/link_watch.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 8f82a5cc385..9c3a839322b 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -92,6 +92,9 @@ static bool linkwatch_urgent_event(struct net_device *dev) if (dev->ifindex != dev->iflink) return true; + if (dev->priv_flags & IFF_TEAM_PORT) + return true; + return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } -- cgit v1.2.3 From fe2c6338fd2c6f383c4d4164262f35c8f3708e1f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 11 Jun 2013 23:04:25 -0700 Subject: net: Convert uses of typedef ctl_table to struct ctl_table Reduce the uses of this unnecessary typedef. Done via perl script: $ git grep --name-only -w ctl_table net | \ xargs perl -p -i -e '\ sub trim { my ($local) = @_; $local =~ s/(^\s+|\s+$)//g; return $local; } \ s/\b(? Signed-off-by: David S. Miller --- net/ax25/sysctl_net_ax25.c | 2 +- net/bridge/br_netfilter.c | 4 ++-- net/core/neighbour.c | 6 ++--- net/core/sysctl_net_core.c | 8 +++---- net/decnet/dn_dev.c | 6 ++--- net/decnet/sysctl_net_decnet.c | 6 ++--- net/ipv4/devinet.c | 6 ++--- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/route.c | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 31 +++++++++++++------------- net/ipv6/addrconf.c | 10 ++++----- net/ipv6/icmp.c | 2 +- net/ipv6/route.c | 4 ++-- net/ipv6/sysctl_net_ipv6.c | 4 ++-- net/irda/irsysctl.c | 6 ++--- net/netfilter/ipvs/ip_vs_ctl.c | 8 +++---- net/netfilter/ipvs/ip_vs_lblc.c | 2 +- net/netfilter/ipvs/ip_vs_lblcr.c | 2 +- net/netfilter/nf_conntrack_standalone.c | 4 ++-- net/netfilter/nf_log.c | 2 +- net/netrom/sysctl_net_netrom.c | 2 +- net/phonet/sysctl.c | 4 ++-- net/rds/ib_sysctl.c | 2 +- net/rds/iw_sysctl.c | 2 +- net/rds/sysctl.c | 2 +- net/rose/sysctl_net_rose.c | 2 +- net/sctp/sysctl.c | 10 ++++----- net/sunrpc/sysctl.c | 10 ++++----- net/sunrpc/xprtrdma/svc_rdma.c | 8 +++---- net/sunrpc/xprtrdma/transport.c | 4 ++-- net/sunrpc/xprtsock.c | 4 ++-- net/unix/sysctl_net_unix.c | 2 +- 32 files changed, 86 insertions(+), 85 deletions(-) (limited to 'net') diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index d5744b75251..919a5ce4751 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -29,7 +29,7 @@ static int min_proto[1], max_proto[] = { AX25_PROTO_MAX }; static int min_ds_timeout[1], max_ds_timeout[] = {65535000}; #endif -static const ctl_table ax25_param_table[] = { +static const struct ctl_table ax25_param_table[] = { { .procname = "ip_default_mode", .maxlen = sizeof(int), diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1ed75bfd8d1..f87736270ea 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -992,7 +992,7 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { #ifdef CONFIG_SYSCTL static -int brnf_sysctl_call_tables(ctl_table * ctl, int write, +int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, void __user * buffer, size_t * lenp, loff_t * ppos) { int ret; @@ -1004,7 +1004,7 @@ int brnf_sysctl_call_tables(ctl_table * ctl, int write, return ret; } -static ctl_table brnf_table[] = { +static struct ctl_table brnf_table[] = { { .procname = "bridge-nf-call-arptables", .data = &brnf_call_arptables, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5c56b217b99..decaa4b9db2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2765,11 +2765,11 @@ EXPORT_SYMBOL(neigh_app_ns); static int zero; static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); -static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int proc_unres_qlen(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { int size, ret; - ctl_table tmp = *ctl; + struct ctl_table tmp = *ctl; tmp.extra1 = &zero; tmp.extra2 = &unres_qlen_max; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 4b48f39582b..637a42e5d58 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -24,12 +24,12 @@ static int one = 1; #ifdef CONFIG_RPS -static int rps_sock_flow_sysctl(ctl_table *table, int write, +static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { unsigned int orig_size, size; int ret, i; - ctl_table tmp = { + struct ctl_table tmp = { .data = &size, .maxlen = sizeof(size), .mode = table->mode @@ -91,7 +91,7 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, #ifdef CONFIG_NET_FLOW_LIMIT static DEFINE_MUTEX(flow_limit_update_mutex); -static int flow_limit_cpu_sysctl(ctl_table *table, int write, +static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -156,7 +156,7 @@ done: return ret; } -static int flow_limit_table_len_sysctl(ctl_table *table, int write, +static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 7d9197063eb..dd0dfb25f4b 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -158,11 +158,11 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU static int min_priority[1]; static int max_priority[] = { 127 }; /* From DECnet spec */ -static int dn_forwarding_proc(ctl_table *, int, +static int dn_forwarding_proc(struct ctl_table *, int, void __user *, size_t *, loff_t *); static struct dn_dev_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table dn_dev_vars[5]; + struct ctl_table dn_dev_vars[5]; } dn_dev_sysctl = { NULL, { @@ -242,7 +242,7 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) } } -static int dn_forwarding_proc(ctl_table *table, int write, +static int dn_forwarding_proc(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index a55eeccaa72..5325b541c52 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str) return 0; } -static int dn_node_address_handler(ctl_table *table, int write, +static int dn_node_address_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -183,7 +183,7 @@ static int dn_node_address_handler(ctl_table *table, int write, return 0; } -static int dn_def_dev_handler(ctl_table *table, int write, +static int dn_def_dev_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -246,7 +246,7 @@ static int dn_def_dev_handler(ctl_table *table, int write, return 0; } -static ctl_table dn_table[] = { +static struct ctl_table dn_table[] = { { .procname = "node_address", .maxlen = 7, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 3469506c106..8d48c392adc 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1942,7 +1942,7 @@ static void inet_forward_change(struct net *net) } } -static int devinet_conf_proc(ctl_table *ctl, int write, +static int devinet_conf_proc(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -1985,7 +1985,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, return ret; } -static int devinet_sysctl_forward(ctl_table *ctl, int write, +static int devinet_sysctl_forward(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -2028,7 +2028,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, return ret; } -static int ipv4_doint_and_flush(ctl_table *ctl, int write, +static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 567d84168bd..0a2e0e3e95b 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -223,7 +223,7 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; -static ctl_table ip_ct_sysctl_table[] = { +static struct ctl_table ip_ct_sysctl_table[] = { { .procname = "ip_conntrack_max", .maxlen = sizeof(int), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 198ea596f2d..f3fa42eac46 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2448,7 +2448,7 @@ static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; -static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, +static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -2463,7 +2463,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, return -EINVAL; } -static ctl_table ipv4_route_table[] = { +static struct ctl_table ipv4_route_table[] = { { .procname = "gc_thresh", .data = &ipv4_dst_ops.gc_thresh, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index fa2f63fc453..b2c123c44d6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -49,13 +49,13 @@ static void set_local_port_range(int range[2]) } /* Validate changes from /proc interface. */ -static int ipv4_local_port_range(ctl_table *table, int write, +static int ipv4_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2]; - ctl_table tmp = { + struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, @@ -100,7 +100,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig } /* Validate changes from /proc interface. */ -static int ipv4_ping_group_range(ctl_table *table, int write, +static int ipv4_ping_group_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -108,7 +108,7 @@ static int ipv4_ping_group_range(ctl_table *table, int write, int ret; gid_t urange[2]; kgid_t low, high; - ctl_table tmp = { + struct ctl_table tmp = { .data = &urange, .maxlen = sizeof(urange), .mode = table->mode, @@ -135,11 +135,11 @@ static int ipv4_ping_group_range(ctl_table *table, int write, return ret; } -static int proc_tcp_congestion_control(ctl_table *ctl, int write, +static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char val[TCP_CA_NAME_MAX]; - ctl_table tbl = { + struct ctl_table tbl = { .data = val, .maxlen = TCP_CA_NAME_MAX, }; @@ -153,12 +153,12 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, return ret; } -static int proc_tcp_available_congestion_control(ctl_table *ctl, +static int proc_tcp_available_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; + struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); @@ -170,12 +170,12 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl, return ret; } -static int proc_allowed_congestion_control(ctl_table *ctl, +static int proc_allowed_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; + struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); @@ -190,7 +190,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } -static int ipv4_tcp_mem(ctl_table *ctl, int write, +static int ipv4_tcp_mem(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -201,7 +201,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, struct mem_cgroup *memcg; #endif - ctl_table tmp = { + struct ctl_table tmp = { .data = &vec, .maxlen = sizeof(vec), .mode = ctl->mode, @@ -233,10 +233,11 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, return 0; } -static int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) { - ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; + struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct tcp_fastopen_context *ctxt; int ret; u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 21010fddb20..80449121afa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4620,13 +4620,13 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) #ifdef CONFIG_SYSCTL static -int addrconf_sysctl_forward(ctl_table *ctl, int write, +int addrconf_sysctl_forward(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; - ctl_table lctl; + struct ctl_table lctl; int ret; /* @@ -4705,13 +4705,13 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) } static -int addrconf_sysctl_disable(ctl_table *ctl, int write, +int addrconf_sysctl_disable(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; - ctl_table lctl; + struct ctl_table lctl; int ret; /* @@ -4733,7 +4733,7 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write, static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table addrconf_vars[DEVCONF_MAX+1]; + struct ctl_table addrconf_vars[DEVCONF_MAX+1]; } addrconf_sysctl __read_mostly = { .sysctl_header = NULL, .addrconf_vars = { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4b4890bbe16..7cfc8d28487 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -976,7 +976,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL -ctl_table ipv6_icmp_table_template[] = { +struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2b874185ebb..7ca87b37c0e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2790,7 +2790,7 @@ static const struct file_operations rt6_stats_seq_fops = { #ifdef CONFIG_SYSCTL static -int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, +int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net; @@ -2805,7 +2805,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, return 0; } -ctl_table ipv6_route_table_template[] = { +struct ctl_table ipv6_route_table_template[] = { { .procname = "flush", .data = &init_net.ipv6.sysctl.flush_delay, diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index e85c48bd404..107b2f1d90a 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,7 +16,7 @@ #include #include -static ctl_table ipv6_table_template[] = { +static struct ctl_table ipv6_table_template[] = { { .procname = "bindv6only", .data = &init_net.ipv6.sysctl.bindv6only, @@ -27,7 +27,7 @@ static ctl_table ipv6_table_template[] = { { } }; -static ctl_table ipv6_rotable[] = { +static struct ctl_table ipv6_rotable[] = { { .procname = "mld_max_msf", .data = &sysctl_mld_max_msf, diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index de73f6496db..d6a59651767 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -73,7 +73,7 @@ static int min_lap_keepalive_time = 100; /* 100us */ /* For other sysctl, I've no idea of the range. Maybe Dag could help * us on that - Jean II */ -static int do_devname(ctl_table *table, int write, +static int do_devname(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -90,7 +90,7 @@ static int do_devname(ctl_table *table, int write, } -static int do_discovery(ctl_table *table, int write, +static int do_discovery(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -111,7 +111,7 @@ static int do_discovery(ctl_table *table, int write, } /* One file */ -static ctl_table irda_table[] = { +static struct ctl_table irda_table[] = { { .procname = "discovery", .data = &sysctl_discovery, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index df05c1c276f..edb88fbcb1b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1575,7 +1575,7 @@ static int zero; static int three = 3; static int -proc_do_defense_mode(ctl_table *table, int write, +proc_do_defense_mode(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; @@ -1596,7 +1596,7 @@ proc_do_defense_mode(ctl_table *table, int write, } static int -proc_do_sync_threshold(ctl_table *table, int write, +proc_do_sync_threshold(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1616,7 +1616,7 @@ proc_do_sync_threshold(ctl_table *table, int write, } static int -proc_do_sync_mode(ctl_table *table, int write, +proc_do_sync_mode(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1634,7 +1634,7 @@ proc_do_sync_mode(ctl_table *table, int write, } static int -proc_do_sync_ports(ctl_table *table, int write, +proc_do_sync_ports(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 5ea26bd8774..44595b8ae37 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -118,7 +118,7 @@ struct ip_vs_lblc_table { * IPVS LBLC sysctl table */ #ifdef CONFIG_SYSCTL -static ctl_table vs_vars_table[] = { +static struct ctl_table vs_vars_table[] = { { .procname = "lblc_expiration", .data = NULL, diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 50123c2ab48..876937db0bf 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -299,7 +299,7 @@ struct ip_vs_lblcr_table { * IPVS LBLCR sysctl table */ -static ctl_table vs_vars_table[] = { +static struct ctl_table vs_vars_table[] = { { .procname = "lblcr_expiration", .data = NULL, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index bd700b4013c..f641751dba9 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -408,7 +408,7 @@ static int log_invalid_proto_max = 255; static struct ctl_table_header *nf_ct_netfilter_header; -static ctl_table nf_ct_sysctl_table[] = { +static struct ctl_table nf_ct_sysctl_table[] = { { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, @@ -458,7 +458,7 @@ static ctl_table nf_ct_sysctl_table[] = { #define NET_NF_CONNTRACK_MAX 2089 -static ctl_table nf_ct_netfilter_table[] = { +static struct ctl_table nf_ct_netfilter_table[] = { { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 4b60a87b759..85296d4eac0 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -245,7 +245,7 @@ static const struct file_operations nflog_file_ops = { static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; -static int nf_log_proc_dostring(ctl_table *table, int write, +static int nf_log_proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 42f630b9a69..ba1c368b3f1 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -34,7 +34,7 @@ static int min_reset[] = {0}, max_reset[] = {1}; static struct ctl_table_header *nr_table_header; -static ctl_table nr_table[] = { +static struct ctl_table nr_table[] = { { .procname = "default_path_quality", .data = &sysctl_netrom_default_path_quality, diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index d6bbbbd0af1..c02a8c4bc11 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -61,13 +61,13 @@ void phonet_get_local_port_range(int *min, int *max) } while (read_seqretry(&local_port_range_lock, seq)); } -static int proc_local_port_range(ctl_table *table, int write, +static int proc_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2] = {local_port_range[0], local_port_range[1]}; - ctl_table tmp = { + struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c index 7e643bafb4a..e4e41b3afce 100644 --- a/net/rds/ib_sysctl.c +++ b/net/rds/ib_sysctl.c @@ -61,7 +61,7 @@ static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; */ unsigned int rds_ib_sysctl_flow_control = 0; -static ctl_table rds_ib_sysctl_table[] = { +static struct ctl_table rds_ib_sysctl_table[] = { { .procname = "max_send_wr", .data = &rds_ib_sysctl_max_send_wr, diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c index 5d5ebd576f3..89c91515ed0 100644 --- a/net/rds/iw_sysctl.c +++ b/net/rds/iw_sysctl.c @@ -55,7 +55,7 @@ static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL; unsigned int rds_iw_sysctl_flow_control = 1; -static ctl_table rds_iw_sysctl_table[] = { +static struct ctl_table rds_iw_sysctl_table[] = { { .procname = "max_send_wr", .data = &rds_iw_sysctl_max_send_wr, diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index 907214b4c4d..b5cb2aa08f3 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -49,7 +49,7 @@ unsigned int rds_sysctl_max_unacked_bytes = (16 << 20); unsigned int rds_sysctl_ping_enable = 1; -static ctl_table rds_sysctl_rds_table[] = { +static struct ctl_table rds_sysctl_rds_table[] = { { .procname = "reconnect_min_delay_ms", .data = &rds_sysctl_reconnect_min_jiffies, diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index 94ca9c2ccd6..89a9278795a 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -24,7 +24,7 @@ static int min_window[] = {1}, max_window[] = {7}; static struct ctl_table_header *rose_table_header; -static ctl_table rose_table[] = { +static struct ctl_table rose_table[] = { { .procname = "restart_request_timeout", .data = &sysctl_rose_restart_request_timeout, diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index bf3c6e8fc40..9a5c4c9edda 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -62,12 +62,12 @@ extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; -static int proc_sctp_do_hmac_alg(ctl_table *ctl, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static ctl_table sctp_table[] = { +static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", .data = &sysctl_sctp_mem, @@ -93,7 +93,7 @@ static ctl_table sctp_table[] = { { /* sentinel */ } }; -static ctl_table sctp_net_table[] = { +static struct ctl_table sctp_net_table[] = { { .procname = "rto_initial", .data = &init_net.sctp.rto_initial, @@ -300,14 +300,14 @@ static ctl_table sctp_net_table[] = { { /* sentinel */ } }; -static int proc_sctp_do_hmac_alg(ctl_table *ctl, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; char tmp[8]; - ctl_table tbl; + struct ctl_table tbl; int ret; int changed = 0; char *none = "none"; diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index af7d339add9..c99c58e2ee6 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(nlm_debug); #ifdef RPC_DEBUG static struct ctl_table_header *sunrpc_table_header; -static ctl_table sunrpc_table[]; +static struct ctl_table sunrpc_table[]; void rpc_register_sysctl(void) @@ -58,7 +58,7 @@ rpc_unregister_sysctl(void) } } -static int proc_do_xprt(ctl_table *table, int write, +static int proc_do_xprt(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[256]; @@ -73,7 +73,7 @@ static int proc_do_xprt(ctl_table *table, int write, } static int -proc_dodebug(ctl_table *table, int write, +proc_dodebug(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[20], c, *s; @@ -135,7 +135,7 @@ done: } -static ctl_table debug_table[] = { +static struct ctl_table debug_table[] = { { .procname = "rpc_debug", .data = &rpc_debug, @@ -173,7 +173,7 @@ static ctl_table debug_table[] = { { } }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 8343737e85f..c1b6270262c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -84,7 +84,7 @@ struct workqueue_struct *svc_rdma_wq; * resets the associated statistic to zero. Any read returns it's * current value. */ -static int read_reset_stat(ctl_table *table, int write, +static int read_reset_stat(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -119,7 +119,7 @@ static int read_reset_stat(ctl_table *table, int write, } static struct ctl_table_header *svcrdma_table_header; -static ctl_table svcrdma_parm_table[] = { +static struct ctl_table svcrdma_parm_table[] = { { .procname = "max_requests", .data = &svcrdma_max_requests, @@ -214,7 +214,7 @@ static ctl_table svcrdma_parm_table[] = { { }, }; -static ctl_table svcrdma_table[] = { +static struct ctl_table svcrdma_table[] = { { .procname = "svc_rdma", .mode = 0555, @@ -223,7 +223,7 @@ static ctl_table svcrdma_table[] = { { }, }; -static ctl_table svcrdma_root_table[] = { +static struct ctl_table svcrdma_root_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 794312f22b9..285dc088411 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -86,7 +86,7 @@ static unsigned int max_memreg = RPCRDMA_LAST - 1; static struct ctl_table_header *sunrpc_table_header; -static ctl_table xr_tunables_table[] = { +static struct ctl_table xr_tunables_table[] = { { .procname = "rdma_slot_table_entries", .data = &xprt_rdma_slot_table_entries, @@ -138,7 +138,7 @@ static ctl_table xr_tunables_table[] = { { }, }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ffd50348a50..412de7cfcc8 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -87,7 +87,7 @@ static struct ctl_table_header *sunrpc_table_header; * FIXME: changing the UDP slot table size should also resize the UDP * socket buffers for existing UDP transports */ -static ctl_table xs_tunables_table[] = { +static struct ctl_table xs_tunables_table[] = { { .procname = "udp_slot_table_entries", .data = &xprt_udp_slot_table_entries, @@ -143,7 +143,7 @@ static ctl_table xs_tunables_table[] = { { }, }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 8800604c93f..b3d515021b7 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -15,7 +15,7 @@ #include -static ctl_table unix_table[] = { +static struct ctl_table unix_table[] = { { .procname = "max_dgram_qlen", .data = &init_net.unx.sysctl_max_dgram_qlen, -- cgit v1.2.3 From 85f16525a2eb66e6092cbd8dcf42371df8334ed0 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 11 Jun 2013 15:35:32 -0700 Subject: tcp: properly send new data in fast recovery in first RTT Linux sends new unset data during disorder and recovery state if all (suspected) lost packets have been retransmitted ( RFC5681, section 3.2 step 1 & 2, RFC3517 section 4, NexSeg() Rule 2). One requirement is to keep the receive window about twice the estimated sender's congestion window (tcp_rcv_space_adjust()), assuming the fast retransmits repair the losses in the next round trip. But currently it's not the case on the first round trip in either normal or Fast Open connection, beucase the initial receive window is identical to (expected) sender's initial congestion window. The fix is to double it. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 13 ++----------- net/ipv4/tcp_output.c | 33 ++++++++++++++++++--------------- 2 files changed, 20 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 907311c9a01..46271cdcf08 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -347,22 +347,13 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) } /* 3. Tuning rcvbuf, when connection enters established state. */ - static void tcp_fixup_rcvbuf(struct sock *sk) { u32 mss = tcp_sk(sk)->advmss; - u32 icwnd = TCP_DEFAULT_INIT_RCVWND; int rcvmem; - /* Limit to 10 segments if mss <= 1460, - * or 14600/mss segments, with a minimum of two segments. - */ - if (mss > 1460) - icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); - - rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER); - - rcvmem *= icwnd; + rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * + tcp_default_init_rwnd(mss); if (sk->sk_rcvbuf < rcvmem) sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ec335fabd5c..3dd46eab3b0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -181,6 +181,21 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } + +u32 tcp_default_init_rwnd(u32 mss) +{ + /* Initial receive window should be twice of TCP_INIT_CWND to + * enable proper sending of new unset data during fast recovery + * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a + * limit when mss is larger than 1460. + */ + u32 init_rwnd = TCP_INIT_CWND * 2; + + if (mss > 1460) + init_rwnd = max((1460 * init_rwnd) / mss, 2U); + return init_rwnd; +} + /* Determine a window scaling and initial window to offer. * Based on the assumption that the given amount of space * will be offered. Store the results in the tp structure. @@ -230,22 +245,10 @@ void tcp_select_initial_window(int __space, __u32 mss, } } - /* Set initial window to a value enough for senders starting with - * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place - * a limit on the initial window when mss is larger than 1460. - */ if (mss > (1 << *rcv_wscale)) { - int init_cwnd = TCP_DEFAULT_INIT_RCVWND; - if (mss > 1460) - init_cwnd = - max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); - /* when initializing use the value from init_rcv_wnd - * rather than the default from above - */ - if (init_rcv_wnd) - *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); - else - *rcv_wnd = min(*rcv_wnd, init_cwnd * mss); + if (!init_rcv_wnd) /* Use default unless specified otherwise */ + init_rcv_wnd = tcp_default_init_rwnd(mss); + *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); } /* Set the clamp no higher than max representable value */ -- cgit v1.2.3 From 817cee767523769cbc5ac94e439cde0c21752cbc Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Sun, 19 May 2013 14:23:57 +0300 Subject: mac80211: track AP's beacon rate and give it to the driver Track the AP's beacon rate in the scan BSS data and in the interface configuration to let the drivers know which rate the AP is using. This information may be used by drivers, in our case to let the firmware optimise beacon RX. Signed-off-by: Alexander Bondar Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 8 +++++++- net/mac80211/scan.c | 9 +++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7a6f1a0207e..a4dfb0be53d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -94,6 +94,7 @@ struct ieee80211_bss { #define IEEE80211_MAX_SUPP_RATES 32 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; size_t supp_rates_len; + struct ieee80211_rate *beacon_rate; /* * During association, we save an ERP value from a probe response so diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ad9bb9e10cb..87f2d4df31f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1779,8 +1779,10 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, * probably just won't work at all. */ bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1; + bss_conf->beacon_rate = bss->beacon_rate; bss_info_changed |= BSS_CHANGED_BEACON_INFO; } else { + bss_conf->beacon_rate = NULL; bss_conf->dtim_period = 0; } @@ -1903,6 +1905,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.chswitch_timer); sdata->vif.bss_conf.dtim_period = 0; + sdata->vif.bss_conf.beacon_rate = NULL; + ifmgd->have_beacon = false; ifmgd->flags = 0; @@ -2754,8 +2758,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, channel); - if (bss) + if (bss) { ieee80211_rx_bss_put(local, bss); + sdata->vif.bss_conf.beacon_rate = bss->beacon_rate; + } if (!sdata->u.mgd.associated || !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 99b103921a4..1b122a79b0d 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -140,6 +140,15 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->valid_data |= IEEE80211_BSS_VALID_WMM; } + if (beacon) { + struct ieee80211_supported_band *sband = + local->hw.wiphy->bands[rx_status->band]; + if (!(rx_status->flag & RX_FLAG_HT) && + !(rx_status->flag & RX_FLAG_VHT)) + bss->beacon_rate = + &sband->bitrates[rx_status->rate_idx]; + } + return bss; } -- cgit v1.2.3 From 38745c7414c0d9a0567c5b4a4e056c6b7f807179 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 10 Jun 2013 10:34:14 +0300 Subject: mac80211: Fix VHT bandwidth change event Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 171344d4eb7..97c289414e3 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -396,7 +396,7 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, new_bw = ieee80211_sta_cur_vht_bw(sta); if (new_bw != sta->sta.bandwidth) { sta->sta.bandwidth = new_bw; - changed |= IEEE80211_RC_NSS_CHANGED; + changed |= IEEE80211_RC_BW_CHANGED; } change: -- cgit v1.2.3 From 1095e69f47926db6f1350a9d6a38626521580e87 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Wed, 22 May 2013 11:36:17 +0200 Subject: NFC: NCI: Fix skb->dev usage skb->dev is used for carrying a net_device pointer and not an nci_dev pointer. Remove usage of skb-dev to carry nci_dev and replace it by parameter in nci_recv_frame(), nci_send_frame() and driver send() functions. NfcWilink driver is also updated to use those functions. Signed-off-by: Frederic Danis Signed-off-by: Samuel Ortiz --- net/nfc/nci/core.c | 17 ++++++----------- net/nfc/nci/data.c | 2 -- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 48ada0ec749..8e0dbbeee9e 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -797,12 +797,11 @@ EXPORT_SYMBOL(nci_unregister_device); /** * nci_recv_frame - receive frame from NCI drivers * + * @ndev: The nci device * @skb: The sk_buff to receive */ -int nci_recv_frame(struct sk_buff *skb) +int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb) { - struct nci_dev *ndev = (struct nci_dev *) skb->dev; - pr_debug("len %d\n", skb->len); if (!ndev || (!test_bit(NCI_UP, &ndev->flags) && @@ -819,10 +818,8 @@ int nci_recv_frame(struct sk_buff *skb) } EXPORT_SYMBOL(nci_recv_frame); -static int nci_send_frame(struct sk_buff *skb) +static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) { - struct nci_dev *ndev = (struct nci_dev *) skb->dev; - pr_debug("len %d\n", skb->len); if (!ndev) { @@ -833,7 +830,7 @@ static int nci_send_frame(struct sk_buff *skb) /* Get rid of skb owner, prior to sending to the driver. */ skb_orphan(skb); - return ndev->ops->send(skb); + return ndev->ops->send(ndev, skb); } /* Send NCI command */ @@ -861,8 +858,6 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) if (plen) memcpy(skb_put(skb, plen), payload, plen); - skb->dev = (void *) ndev; - skb_queue_tail(&ndev->cmd_q, skb); queue_work(ndev->cmd_wq, &ndev->cmd_work); @@ -894,7 +889,7 @@ static void nci_tx_work(struct work_struct *work) nci_conn_id(skb->data), nci_plen(skb->data)); - nci_send_frame(skb); + nci_send_frame(ndev, skb); mod_timer(&ndev->data_timer, jiffies + msecs_to_jiffies(NCI_DATA_TIMEOUT)); @@ -963,7 +958,7 @@ static void nci_cmd_work(struct work_struct *work) nci_opcode_oid(nci_opcode(skb->data)), nci_plen(skb->data)); - nci_send_frame(skb); + nci_send_frame(ndev, skb); mod_timer(&ndev->cmd_timer, jiffies + msecs_to_jiffies(NCI_CMD_TIMEOUT)); diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 76c48c5324f..2a9399dd6c6 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -80,8 +80,6 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev, nci_mt_set((__u8 *)hdr, NCI_MT_DATA_PKT); nci_pbf_set((__u8 *)hdr, pbf); - - skb->dev = (void *) ndev; } static int nci_queue_tx_data_frags(struct nci_dev *ndev, -- cgit v1.2.3 From 9674da8759df0d6c0d24e1ede6e2a1acdef91e3c Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Mon, 29 Apr 2013 17:13:27 +0200 Subject: NFC: Add firmware upload netlink command As several NFC chipsets can have their firmwares upgraded and reflashed, this patchset adds a new netlink command to trigger that the driver loads or flashes a new firmware. This will allows userspace triggered firmware upgrade through netlink. The firmware name or hint is passed as a parameter, and the driver will eventually fetch the firmware binary through the request_firmware API. The cmd can only be executed when the nfc dev is not in use. Actual firmware loading/flashing is an asynchronous operation. Result of the operation shall send a new event up to user space through the nfc dev multicast socket. During operation, the nfc dev is not openable and thus not usable. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 46 ++++++++++++++++++++++++++++++++++++++++ net/nfc/netlink.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 5 +++++ 3 files changed, 114 insertions(+) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index 40d2527693d..eb3cecf1764 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -44,6 +44,47 @@ DEFINE_MUTEX(nfc_devlist_mutex); /* NFC device ID bitmap */ static DEFINE_IDA(nfc_index_ida); +int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name) +{ + int rc = 0; + + pr_debug("%s do firmware %s\n", dev_name(&dev->dev), firmware_name); + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (dev->dev_up) { + rc = -EBUSY; + goto error; + } + + if (!dev->ops->fw_upload) { + rc = -EOPNOTSUPP; + goto error; + } + + dev->fw_upload_in_progress = true; + rc = dev->ops->fw_upload(dev, firmware_name); + if (rc) + dev->fw_upload_in_progress = false; + +error: + device_unlock(&dev->dev); + return rc; +} + +int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +{ + dev->fw_upload_in_progress = false; + + return nfc_genl_fw_upload_done(dev, firmware_name); +} +EXPORT_SYMBOL(nfc_fw_upload_done); + /** * nfc_dev_up - turn on the NFC device * @@ -69,6 +110,11 @@ int nfc_dev_up(struct nfc_dev *dev) goto error; } + if (dev->fw_upload_in_progress) { + rc = -EBUSY; + goto error; + } + if (dev->dev_up) { rc = -EALREADY; goto error; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f0c4d61f37c..1deadad9a28 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -56,6 +56,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 }, [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 }, [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, + [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, + .len = NFC_FIRMWARE_NAME_MAXSIZE }, }; static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { @@ -1025,6 +1027,62 @@ exit: return rc; } +static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx; + char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1]; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], + sizeof(firmware_name)); + + rc = nfc_fw_upload(dev, firmware_name); + + nfc_put_device(dev); + return rc; +} + +int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_CMD_FW_UPLOAD); + if (!hdr) + goto free_msg; + + if (nla_put_string(msg, NFC_ATTR_FIRMWARE_NAME, firmware_name) || + nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + static struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -1084,6 +1142,11 @@ static struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_llc_sdreq, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_FW_UPLOAD, + .doit = nfc_genl_fw_upload, + .policy = nfc_genl_policy, + }, }; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index afa1f84ba04..cf0c4816599 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -120,6 +120,11 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter) class_dev_iter_exit(iter); } +int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name); +int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); + +int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); + int nfc_dev_up(struct nfc_dev *dev); int nfc_dev_down(struct nfc_dev *dev); -- cgit v1.2.3 From 9a695d23aab889273821c91b4132f1ed315b251b Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Mon, 29 Apr 2013 17:47:42 +0200 Subject: NFC: HCI: Implement fw_upload ops This is a simple forward to the HCI driver. When driver is done with the operation, it shall directly notify NFC Core by calling nfc_fw_upload_done(). Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- net/nfc/hci/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 91020b210d8..b7e4dac5654 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -779,6 +779,16 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) } } +static int hci_fw_upload(struct nfc_dev *nfc_dev, const char *firmware_name) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (hdev->ops->fw_upload) + return hdev->ops->fw_upload(hdev, firmware_name); + + return -ENOTSUPP; +} + static struct nfc_ops hci_nfc_ops = { .dev_up = hci_dev_up, .dev_down = hci_dev_down, @@ -791,6 +801,7 @@ static struct nfc_ops hci_nfc_ops = { .im_transceive = hci_transceive, .tm_send = hci_tm_send, .check_presence = hci_check_presence, + .fw_upload = hci_fw_upload, }; struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, -- cgit v1.2.3 From a395298c9c96748cbd6acee4cb9a5ba13fbb3ab8 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Sat, 25 May 2013 01:21:21 +0200 Subject: NFC: HCI: Follow a positive code path in the HCI ops implementations Exiting on the error case is more typical to the kernel coding style. Signed-off-by: Samuel Ortiz --- net/nfc/hci/core.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index b7e4dac5654..d2ef1e2ee0c 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -570,21 +570,21 @@ static int hci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->dep_link_up) - return hdev->ops->dep_link_up(hdev, target, comm_mode, - gb, gb_len); + if (!hdev->ops->dep_link_up) + return 0; - return 0; + return hdev->ops->dep_link_up(hdev, target, comm_mode, + gb, gb_len); } static int hci_dep_link_down(struct nfc_dev *nfc_dev) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->dep_link_down) - return hdev->ops->dep_link_down(hdev); + if (!hdev->ops->dep_link_down) + return 0; - return 0; + return hdev->ops->dep_link_down(hdev); } static int hci_activate_target(struct nfc_dev *nfc_dev, @@ -673,12 +673,12 @@ static int hci_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->tm_send) - return hdev->ops->tm_send(hdev, skb); - - kfree_skb(skb); + if (!hdev->ops->tm_send) { + kfree_skb(skb); + return -ENOTSUPP; + } - return -ENOTSUPP; + return hdev->ops->tm_send(hdev, skb); } static int hci_check_presence(struct nfc_dev *nfc_dev, @@ -686,10 +686,10 @@ static int hci_check_presence(struct nfc_dev *nfc_dev, { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->check_presence) - return hdev->ops->check_presence(hdev, target); + if (!hdev->ops->check_presence) + return 0; - return 0; + return hdev->ops->check_presence(hdev, target); } static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err) @@ -783,10 +783,10 @@ static int hci_fw_upload(struct nfc_dev *nfc_dev, const char *firmware_name) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->fw_upload) - return hdev->ops->fw_upload(hdev, firmware_name); + if (!hdev->ops->fw_upload) + return -ENOTSUPP; - return -ENOTSUPP; + return hdev->ops->fw_upload(hdev, firmware_name); } static struct nfc_ops hci_nfc_ops = { -- cgit v1.2.3 From 5f121b9a83b499a61ed44e5ba619c7de8f7271ad Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 13 Jun 2013 15:29:38 -0400 Subject: net-rps: fixes for rps flow limit Caught by sparse: - __rcu: missing annotation to sd->flow_limit - __user: direct access in cpumask_scnprintf Also - add endline character when printing bitmap if room in buffer - avoid bucket overflow by reducing FLOW_LIMIT_HISTORY The last item warrants some explanation. The hashtable buckets are subject to overflow if FLOW_LIMIT_HISTORY is larger than or equal to bucket size, since all packets may end up in a single bucket. The current (rather arbitrary) history value of 256 happens to match the buffer size (u8). As a result, with a single flow, the first 128 packets are accepted (correct), the second 128 packets dropped (correct) and then the history[] array has filled, so that each subsequent new packet causes an increment in the bucket for new_flow plus a decrement for old_flow: a steady state. This is fine if packets are dropped, as the steady state goes away as soon as a mix of traffic reappears. But, because the 256th packet overflowed the bucket to 0: no packets are dropped. Instead of explicitly adding an overflow check, this patch changes FLOW_LIMIT_HISTORY to never be able to overflow a single bucket. Reported-by: Fengguang Wu (first item) Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 637a42e5d58..78c746e016a 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -132,6 +132,8 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, write_unlock: mutex_unlock(&flow_limit_update_mutex); } else { + char kbuf[128]; + if (*ppos || !*lenp) { *lenp = 0; goto done; @@ -146,9 +148,20 @@ write_unlock: } rcu_read_unlock(); - len = cpumask_scnprintf(buffer, *lenp, mask); - *lenp = len + 1; - *ppos += len + 1; + len = min(sizeof(kbuf) - 1, *lenp); + len = cpumask_scnprintf(kbuf, len, mask); + if (!len) { + *lenp = 0; + goto done; + } + if (len < *lenp) + kbuf[len++] = '\n'; + if (copy_to_user(buffer, kbuf, len)) { + ret = -EFAULT; + goto done; + } + *lenp = len; + *ppos += len; } done: -- cgit v1.2.3 From ca4ec90b31d1ecf01087c607933cf792057bc8bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Jun 2013 07:58:30 -0700 Subject: htb: reorder struct htb_class fields for performance htb_class structures are big, and source of false sharing on SMP. By carefully splitting them in two parts, we can improve performance. I got 9 % performance increase on a 24 threads machine, with 200 concurrent netperf in TCP_RR mode, using a HTB hierarchy of 4 classes. Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 62 ++++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 1a3655a606c..7954e73d118 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -76,23 +76,39 @@ enum htb_cmode { HTB_CAN_SEND /* class can send */ }; -/* interior & leaf nodes; props specific to leaves are marked L: */ +/* interior & leaf nodes; props specific to leaves are marked L: + * To reduce false sharing, place mostly read fields at beginning, + * and mostly written ones at the end. + */ struct htb_class { struct Qdisc_class_common common; - /* general class parameters */ - struct gnet_stats_basic_packed bstats; - struct gnet_stats_queue qstats; + struct psched_ratecfg rate; + struct psched_ratecfg ceil; + s64 buffer, cbuffer;/* token bucket depth/rate */ + s64 mbuffer; /* max wait time */ + int prio; /* these two are used only by leaves... */ + int quantum; /* but stored for parent-to-leaf return */ + + struct tcf_proto *filter_list; /* class attached filters */ + int filter_cnt; + int refcnt; /* usage count of this class */ + + int level; /* our level (see above) */ + unsigned int children; + struct htb_class *parent; /* parent class */ + struct gnet_stats_rate_est64 rate_est; - struct tc_htb_xstats xstats; /* our special stats */ - int refcnt; /* usage count of this class */ - /* topology */ - int level; /* our level (see above) */ - unsigned int children; - struct htb_class *parent; /* parent class */ + /* + * Written often fields + */ + struct gnet_stats_basic_packed bstats; + struct gnet_stats_queue qstats; + struct tc_htb_xstats xstats; /* our special stats */ - int prio; /* these two are used only by leaves... */ - int quantum; /* but stored for parent-to-leaf return */ + /* token bucket parameters */ + s64 tokens, ctokens;/* current number of tokens */ + s64 t_c; /* checkpoint time */ union { struct htb_class_leaf { @@ -111,24 +127,12 @@ struct htb_class { u32 last_ptr_id[TC_HTB_NUMPRIO]; } inner; } un; - struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ - struct rb_node pq_node; /* node for event queue */ - s64 pq_key; - - int prio_activity; /* for which prios are we active */ - enum htb_cmode cmode; /* current mode of the class */ + s64 pq_key; - /* class attached filters */ - struct tcf_proto *filter_list; - int filter_cnt; - - /* token bucket parameters */ - struct psched_ratecfg rate; - struct psched_ratecfg ceil; - s64 buffer, cbuffer; /* token bucket depth/rate */ - s64 mbuffer; /* max wait time */ - s64 tokens, ctokens; /* current number of tokens */ - s64 t_c; /* checkpoint time */ + int prio_activity; /* for which prios are we active */ + enum htb_cmode cmode; /* current mode of the class */ + struct rb_node pq_node; /* node for event queue */ + struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ }; struct htb_sched { -- cgit v1.2.3 From 1d8faf48c74b8329a0322dc4b2a2030ae5003c86 Mon Sep 17 00:00:00 2001 From: Rony Efraim Date: Thu, 13 Jun 2013 13:19:10 +0300 Subject: net/core: Add VF link state control Add netlink directives and ndo entry to allow for controling VF link, which can be in one of three states: Auto - VF link state reflects the PF link state (default) Up - VF link state is up, traffic from VF to VF works even if the actual PF link is down Down - VF link state is down, no traffic from/to this VF, can be of use while configuring the VF Signed-off-by: Rony Efraim Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 49c14451d8a..9007533867f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -947,6 +947,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_vlan vf_vlan; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; + struct ifla_vf_link_state vf_linkstate; /* * Not all SR-IOV capable drivers support the @@ -956,18 +957,24 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, */ ivi.spoofchk = -1; memset(ivi.mac, 0, sizeof(ivi.mac)); + /* The default value for VF link state is "auto" + * IFLA_VF_LINK_STATE_AUTO which equals zero + */ + ivi.linkstate = 0; if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = - vf_spoofchk.vf = ivi.vf; + vf_spoofchk.vf = + vf_linkstate.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; vf_tx_rate.rate = ivi.tx_rate; vf_spoofchk.setting = ivi.spoofchk; + vf_linkstate.link_state = ivi.linkstate; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -978,7 +985,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate) || nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), - &vf_spoofchk)) + &vf_spoofchk) || + nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate), + &vf_linkstate)) goto nla_put_failure; nla_nest_end(skb, vf); } @@ -1238,6 +1247,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) ivs->setting); break; } + case IFLA_VF_LINK_STATE: { + struct ifla_vf_link_state *ivl; + ivl = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + break; + } default: err = -EINVAL; break; -- cgit v1.2.3 From 8a00a61b0ef2bfd1b468dd20c0d0b1a94a8f7475 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Wed, 29 May 2013 15:35:02 +0200 Subject: NFC: Add basic NCI over SPI The NFC Forum defines a transport interface based on Serial Peripheral Interface (SPI) for the NFC Controller Interface (NCI). This module implements the SPI transport of NCI, calling SPI module directly to read/write data to NFC controller (NFCC). NFCC driver should provide functions performing device open and close. It should also provide functions asserting/de-asserting interruption to prevent TX/RX race conditions. NFCC driver can also fix a delay between transactions if needed by the hardware. Signed-off-by: Frederic Danis Signed-off-by: Samuel Ortiz --- net/nfc/nci/Kconfig | 10 ++++ net/nfc/nci/Makefile | 4 +- net/nfc/nci/spi.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 net/nfc/nci/spi.c (limited to 'net') diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig index 6d69b5f0f19..2a2416080b4 100644 --- a/net/nfc/nci/Kconfig +++ b/net/nfc/nci/Kconfig @@ -8,3 +8,13 @@ config NFC_NCI Say Y here to compile NCI support into the kernel or say M to compile it as module (nci). + +config NFC_NCI_SPI + depends on NFC_NCI && SPI + bool "NCI over SPI protocol support" + default n + help + NCI (NFC Controller Interface) is a communication protocol between + an NFC Controller (NFCC) and a Device Host (DH). + + Say yes if you use an NCI driver that requires SPI link layer. diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile index cdb3a2e4447..7aeedc43187 100644 --- a/net/nfc/nci/Makefile +++ b/net/nfc/nci/Makefile @@ -4,4 +4,6 @@ obj-$(CONFIG_NFC_NCI) += nci.o -nci-objs := core.o data.o lib.o ntf.o rsp.o \ No newline at end of file +nci-objs := core.o data.o lib.o ntf.o rsp.o + +nci-$(CONFIG_NFC_NCI_SPI) += spi.o diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c new file mode 100644 index 00000000000..ebcdba51418 --- /dev/null +++ b/net/nfc/nci/spi.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2013 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#define pr_fmt(fmt) "nci_spi: %s: " fmt, __func__ + +#include +#include +#include +#include + +#define NCI_SPI_HDR_LEN 4 +#define NCI_SPI_CRC_LEN 2 + +static int nci_spi_open(struct nci_dev *nci_dev) +{ + struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev); + + return ndev->ops->open(ndev); +} + +static int nci_spi_close(struct nci_dev *nci_dev) +{ + struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev); + + return ndev->ops->close(ndev); +} + +static int nci_spi_send(struct nci_dev *nci_dev, struct sk_buff *skb) +{ + return 0; +} + +static struct nci_ops nci_spi_ops = { + .open = nci_spi_open, + .close = nci_spi_close, + .send = nci_spi_send, +}; + +/* ---- Interface to NCI SPI drivers ---- */ + +/** + * nci_spi_allocate_device - allocate a new nci spi device + * + * @spi: SPI device + * @ops: device operations + * @supported_protocols: NFC protocols supported by the device + * @supported_se: NFC Secure Elements supported by the device + * @acknowledge_mode: Acknowledge mode used by the device + * @delay: delay between transactions in us + */ +struct nci_spi_dev *nci_spi_allocate_device(struct spi_device *spi, + struct nci_spi_ops *ops, + u32 supported_protocols, + u32 supported_se, + u8 acknowledge_mode, + unsigned int delay) +{ + struct nci_spi_dev *ndev; + int tailroom = 0; + + if (!ops->open || !ops->close || !ops->assert_int || !ops->deassert_int) + return NULL; + + if (!supported_protocols) + return NULL; + + ndev = devm_kzalloc(&spi->dev, sizeof(struct nci_dev), GFP_KERNEL); + if (!ndev) + return NULL; + + ndev->ops = ops; + ndev->acknowledge_mode = acknowledge_mode; + ndev->xfer_udelay = delay; + + if (acknowledge_mode == NCI_SPI_CRC_ENABLED) + tailroom += NCI_SPI_CRC_LEN; + + ndev->nci_dev = nci_allocate_device(&nci_spi_ops, supported_protocols, + supported_se, NCI_SPI_HDR_LEN, + tailroom); + if (!ndev->nci_dev) + return NULL; + + nci_set_drvdata(ndev->nci_dev, ndev); + + return ndev; +} +EXPORT_SYMBOL_GPL(nci_spi_allocate_device); + +/** + * nci_spi_free_device - deallocate nci spi device + * + * @ndev: The nci spi device to deallocate + */ +void nci_spi_free_device(struct nci_spi_dev *ndev) +{ + nci_free_device(ndev->nci_dev); +} +EXPORT_SYMBOL_GPL(nci_spi_free_device); + +/** + * nci_spi_register_device - register a nci spi device in the nfc subsystem + * + * @pdev: The nci spi device to register + */ +int nci_spi_register_device(struct nci_spi_dev *ndev) +{ + return nci_register_device(ndev->nci_dev); +} +EXPORT_SYMBOL_GPL(nci_spi_register_device); + +/** + * nci_spi_unregister_device - unregister a nci spi device in the nfc subsystem + * + * @dev: The nci spi device to unregister + */ +void nci_spi_unregister_device(struct nci_spi_dev *ndev) +{ + nci_unregister_device(ndev->nci_dev); +} +EXPORT_SYMBOL_GPL(nci_spi_unregister_device); -- cgit v1.2.3 From ee9596d467e4d05c77a8c883aeeb5b74d1a3cd31 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Wed, 29 May 2013 15:35:03 +0200 Subject: NFC: Add NCI over SPI send Before any operation, driver interruption is de-asserted to prevent race condition between TX and RX. The NCI over SPI header is added in front of NCI packet. If acknowledged mode is set, CRC-16-CCITT is added to the packet. Then the packet is forwarded to SPI module to be sent. A delay after the transaction is added. This delay is determined by the driver during nci_spi_allocate_device() call and can be 0. After data has been sent, driver interruption is re-asserted. If acknowledged mode is set, nci_spi_send will block until acknowledgment is received. Signed-off-by: Frederic Danis Signed-off-by: Samuel Ortiz --- net/nfc/nci/spi.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index ebcdba51418..6258461e699 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -20,12 +20,25 @@ #include #include +#include #include #include #define NCI_SPI_HDR_LEN 4 #define NCI_SPI_CRC_LEN 2 +#define NCI_SPI_SEND_TIMEOUT (NCI_CMD_TIMEOUT > NCI_DATA_TIMEOUT ? \ + NCI_CMD_TIMEOUT : NCI_DATA_TIMEOUT) + +#define NCI_SPI_DIRECT_WRITE 0x01 +#define NCI_SPI_DIRECT_READ 0x02 + +#define ACKNOWLEDGE_NONE 0 +#define ACKNOWLEDGE_ACK 1 +#define ACKNOWLEDGE_NACK 2 + +#define CRC_INIT 0xFFFF + static int nci_spi_open(struct nci_dev *nci_dev) { struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev); @@ -40,9 +53,65 @@ static int nci_spi_close(struct nci_dev *nci_dev) return ndev->ops->close(ndev); } +static int __nci_spi_send(struct nci_spi_dev *ndev, struct sk_buff *skb) +{ + struct spi_message m; + struct spi_transfer t; + + t.tx_buf = skb->data; + t.len = skb->len; + t.cs_change = 0; + t.delay_usecs = ndev->xfer_udelay; + + spi_message_init(&m); + spi_message_add_tail(&t, &m); + + return spi_sync(ndev->spi, &m); +} + static int nci_spi_send(struct nci_dev *nci_dev, struct sk_buff *skb) { - return 0; + struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev); + unsigned int payload_len = skb->len; + unsigned char *hdr; + int ret; + long completion_rc; + + ndev->ops->deassert_int(ndev); + + /* add the NCI SPI header to the start of the buffer */ + hdr = skb_push(skb, NCI_SPI_HDR_LEN); + hdr[0] = NCI_SPI_DIRECT_WRITE; + hdr[1] = ndev->acknowledge_mode; + hdr[2] = payload_len >> 8; + hdr[3] = payload_len & 0xFF; + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) { + u16 crc; + + crc = crc_ccitt(CRC_INIT, skb->data, skb->len); + *skb_put(skb, 1) = crc >> 8; + *skb_put(skb, 1) = crc & 0xFF; + } + + ret = __nci_spi_send(ndev, skb); + + kfree_skb(skb); + ndev->ops->assert_int(ndev); + + if (ret != 0 || ndev->acknowledge_mode == NCI_SPI_CRC_DISABLED) + goto done; + + init_completion(&ndev->req_completion); + completion_rc = + wait_for_completion_interruptible_timeout(&ndev->req_completion, + NCI_SPI_SEND_TIMEOUT); + + if (completion_rc <= 0 || ndev->req_result == ACKNOWLEDGE_NACK) + ret = -EIO; + +done: + return ret; } static struct nci_ops nci_spi_ops = { -- cgit v1.2.3 From 391d8a2da787257aeaf952c974405b53926e3fb3 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Wed, 29 May 2013 15:35:04 +0200 Subject: NFC: Add NCI over SPI receive Before any operation, driver interruption is de-asserted to prevent race condition between TX and RX. Transaction starts by emitting "Direct read" and acknowledged mode bytes. Then packet length is read allowing to allocate correct NCI socket buffer. After that payload is retrieved. A delay after the transaction can be added. This delay is determined by the driver during nci_spi_allocate_device() call and can be 0. If acknowledged mode is set: - CRC of header and payload is checked - if frame reception fails (CRC error): NACK is sent - if received frame has ACK or NACK flag: unblock nci_spi_send() Payload is passed to NCI module. At the end, driver interruption is re asserted. Signed-off-by: Frederic Danis Signed-off-by: Samuel Ortiz --- net/nfc/nci/spi.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) (limited to 'net') diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index 6258461e699..70afc387a96 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -26,6 +26,8 @@ #define NCI_SPI_HDR_LEN 4 #define NCI_SPI_CRC_LEN 2 +#define NCI_SPI_ACK_SHIFT 6 +#define NCI_SPI_MSB_PAYLOAD_MASK 0x3F #define NCI_SPI_SEND_TIMEOUT (NCI_CMD_TIMEOUT > NCI_DATA_TIMEOUT ? \ NCI_CMD_TIMEOUT : NCI_DATA_TIMEOUT) @@ -203,3 +205,175 @@ void nci_spi_unregister_device(struct nci_spi_dev *ndev) nci_unregister_device(ndev->nci_dev); } EXPORT_SYMBOL_GPL(nci_spi_unregister_device); + +static int send_acknowledge(struct nci_spi_dev *ndev, u8 acknowledge) +{ + struct sk_buff *skb; + unsigned char *hdr; + u16 crc; + int ret; + + skb = nci_skb_alloc(ndev->nci_dev, 0, GFP_KERNEL); + + /* add the NCI SPI header to the start of the buffer */ + hdr = skb_push(skb, NCI_SPI_HDR_LEN); + hdr[0] = NCI_SPI_DIRECT_WRITE; + hdr[1] = NCI_SPI_CRC_ENABLED; + hdr[2] = acknowledge << NCI_SPI_ACK_SHIFT; + hdr[3] = 0; + + crc = crc_ccitt(CRC_INIT, skb->data, skb->len); + *skb_put(skb, 1) = crc >> 8; + *skb_put(skb, 1) = crc & 0xFF; + + ret = __nci_spi_send(ndev, skb); + + kfree_skb(skb); + + return ret; +} + +static struct sk_buff *__nci_spi_recv_frame(struct nci_spi_dev *ndev) +{ + struct sk_buff *skb; + struct spi_message m; + unsigned char req[2], resp_hdr[2]; + struct spi_transfer tx, rx; + unsigned short rx_len = 0; + int ret; + + spi_message_init(&m); + req[0] = NCI_SPI_DIRECT_READ; + req[1] = ndev->acknowledge_mode; + tx.tx_buf = req; + tx.len = 2; + tx.cs_change = 0; + spi_message_add_tail(&tx, &m); + rx.rx_buf = resp_hdr; + rx.len = 2; + rx.cs_change = 1; + spi_message_add_tail(&rx, &m); + ret = spi_sync(ndev->spi, &m); + + if (ret) + return NULL; + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) + rx_len = ((resp_hdr[0] & NCI_SPI_MSB_PAYLOAD_MASK) << 8) + + resp_hdr[1] + NCI_SPI_CRC_LEN; + else + rx_len = (resp_hdr[0] << 8) | resp_hdr[1]; + + skb = nci_skb_alloc(ndev->nci_dev, rx_len, GFP_KERNEL); + if (!skb) + return NULL; + + spi_message_init(&m); + rx.rx_buf = skb_put(skb, rx_len); + rx.len = rx_len; + rx.cs_change = 0; + rx.delay_usecs = ndev->xfer_udelay; + spi_message_add_tail(&rx, &m); + ret = spi_sync(ndev->spi, &m); + + if (ret) + goto receive_error; + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) { + *skb_push(skb, 1) = resp_hdr[1]; + *skb_push(skb, 1) = resp_hdr[0]; + } + + return skb; + +receive_error: + kfree_skb(skb); + + return NULL; +} + +static int nci_spi_check_crc(struct sk_buff *skb) +{ + u16 crc_data = (skb->data[skb->len - 2] << 8) | + skb->data[skb->len - 1]; + int ret; + + ret = (crc_ccitt(CRC_INIT, skb->data, skb->len - NCI_SPI_CRC_LEN) + == crc_data); + + skb_trim(skb, skb->len - NCI_SPI_CRC_LEN); + + return ret; +} + +static u8 nci_spi_get_ack(struct sk_buff *skb) +{ + u8 ret; + + ret = skb->data[0] >> NCI_SPI_ACK_SHIFT; + + /* Remove NFCC part of the header: ACK, NACK and MSB payload len */ + skb_pull(skb, 2); + + return ret; +} + +/** + * nci_spi_recv_frame - receive frame from NCI SPI drivers + * + * @ndev: The nci spi device + * Context: can sleep + * + * This call may only be used from a context that may sleep. The sleep + * is non-interruptible, and has no timeout. + * + * It returns zero on success, else a negative error code. + */ +int nci_spi_recv_frame(struct nci_spi_dev *ndev) +{ + struct sk_buff *skb; + int ret = 0; + + ndev->ops->deassert_int(ndev); + + /* Retrieve frame from SPI */ + skb = __nci_spi_recv_frame(ndev); + if (!skb) { + ret = -EIO; + goto done; + } + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) { + if (!nci_spi_check_crc(skb)) { + send_acknowledge(ndev, ACKNOWLEDGE_NACK); + goto done; + } + + /* In case of acknowledged mode: if ACK or NACK received, + * unblock completion of latest frame sent. + */ + ndev->req_result = nci_spi_get_ack(skb); + if (ndev->req_result) + complete(&ndev->req_completion); + } + + /* If there is no payload (ACK/NACK only frame), + * free the socket buffer + */ + if (skb->len == 0) { + kfree_skb(skb); + goto done; + } + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) + send_acknowledge(ndev, ACKNOWLEDGE_ACK); + + /* Forward skb to NCI core layer */ + ret = nci_recv_frame(ndev->nci_dev, skb); + +done: + ndev->ops->assert_int(ndev); + + return ret; +} +EXPORT_SYMBOL_GPL(nci_spi_recv_frame); -- cgit v1.2.3 From 0b456c418a5595b9d67f300c9ac6a2441e774603 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 7 May 2013 19:22:11 +0200 Subject: NFC: Remove the static supported_se field Supported secure elements are typically found during a discovery process initiated when the NFC controller is up and running. For a given NFC chipset there can be many configurations (embedded SE or not, with or without a SIM card wired to the NFC controller SWP interface, etc...) and thus driver code will never know before hand which SEs are available. So we remove this field, it will be replaced by a real SE discovery mechanism. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 2 -- net/nfc/hci/core.c | 3 +-- net/nfc/nci/core.c | 2 -- net/nfc/nci/spi.c | 3 +-- net/nfc/netlink.c | 1 - 5 files changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index eb3cecf1764..334954a1d6e 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -832,7 +832,6 @@ struct nfc_dev *nfc_get_device(unsigned int idx) */ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, - u32 supported_se, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; @@ -850,7 +849,6 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; - dev->supported_se = supported_se; dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index d2ef1e2ee0c..9c8a63d341d 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -808,7 +808,6 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, - u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, @@ -834,7 +833,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, return NULL; } - hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, supported_se, + hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, tx_headroom + HCI_CMDS_HEADROOM, tx_tailroom); if (!hdev->ndev) { diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 8e0dbbeee9e..145bad15e11 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -658,7 +658,6 @@ static struct nfc_ops nci_nfc_ops = { */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, - __u32 supported_se, int tx_headroom, int tx_tailroom) { struct nci_dev *ndev; @@ -681,7 +680,6 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, - supported_se, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index 70afc387a96..c7cf37ba729 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -162,8 +162,7 @@ struct nci_spi_dev *nci_spi_allocate_device(struct spi_device *spi, tailroom += NCI_SPI_CRC_LEN; ndev->nci_dev = nci_allocate_device(&nci_spi_ops, supported_protocols, - supported_se, NCI_SPI_HDR_LEN, - tailroom); + NCI_SPI_HDR_LEN, tailroom); if (!ndev->nci_dev) return NULL; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 1deadad9a28..fdbc662c564 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -444,7 +444,6 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || - nla_put_u32(msg, NFC_ATTR_SE, dev->supported_se) || nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; -- cgit v1.2.3 From 0a946301c2d3eac8673e556df820c0b6023ac6c3 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 11:57:06 +0200 Subject: NFC: Extend and fix the internal secure element API Secure elements need to be discovered after enabling the NFC controller. This is typically done by the NCI core and the HCI drivers (HCI does not specify how to discover SEs, it is left to the specific drivers). Also, the SE enable/disable API explicitely takes a SE index as its argument. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 7 +++++++ net/nfc/hci/core.c | 33 +++++++++++++++++++++++++++++++++ net/nfc/nci/core.c | 18 ++++++++++++++++++ 3 files changed, 58 insertions(+) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index 334954a1d6e..a43a56d7f4b 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -126,6 +126,13 @@ int nfc_dev_up(struct nfc_dev *dev) if (!rc) dev->dev_up = true; + /* We have to enable the device before discovering SEs */ + if (dev->ops->discover_se) { + rc = dev->ops->discover_se(dev); + if (!rc) + pr_warn("SE discovery failed\n"); + } + error: device_unlock(&dev->dev); return rc; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 9c8a63d341d..7b1c186736e 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -692,6 +692,36 @@ static int hci_check_presence(struct nfc_dev *nfc_dev, return hdev->ops->check_presence(hdev, target); } +static int hci_discover_se(struct nfc_dev *nfc_dev) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (hdev->ops->discover_se) + return hdev->ops->discover_se(hdev); + + return 0; +} + +static int hci_enable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (hdev->ops->enable_se) + return hdev->ops->enable_se(hdev, se_idx); + + return 0; +} + +static int hci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (hdev->ops->disable_se) + return hdev->ops->enable_se(hdev, se_idx); + + return 0; +} + static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err) { mutex_lock(&hdev->msg_tx_mutex); @@ -802,6 +832,9 @@ static struct nfc_ops hci_nfc_ops = { .tm_send = hci_tm_send, .check_presence = hci_check_presence, .fw_upload = hci_fw_upload, + .discover_se = hci_discover_se, + .enable_se = hci_enable_se, + .disable_se = hci_disable_se, }; struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 145bad15e11..b943d46a164 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -636,6 +636,21 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, return rc; } +static int nci_enable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + return 0; +} + +static int nci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + return 0; +} + +static int nci_discover_se(struct nfc_dev *nfc_dev) +{ + return 0; +} + static struct nfc_ops nci_nfc_ops = { .dev_up = nci_dev_up, .dev_down = nci_dev_down, @@ -646,6 +661,9 @@ static struct nfc_ops nci_nfc_ops = { .activate_target = nci_activate_target, .deactivate_target = nci_deactivate_target, .im_transceive = nci_transceive, + .enable_se = nci_enable_se, + .disable_se = nci_disable_se, + .discover_se = nci_discover_se, }; /* ---- Interface to NCI drivers ---- */ -- cgit v1.2.3 From fed7c25ec0d4894edfc36bbe5c5231e52f45483a Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 15:28:38 +0200 Subject: NFC: Add secure elements addition and removal API This API will allow NFC drivers to add and remove the secure elements they know about or detect. Typically this should be called (asynchronously or not) from the driver or the host interface stack detect_se hook. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index a43a56d7f4b..dacadfbcace 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -760,6 +760,49 @@ inline void nfc_driver_failure(struct nfc_dev *dev, int err) } EXPORT_SYMBOL(nfc_driver_failure); +int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) +{ + struct nfc_se *se, *n; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) + if (se->idx == se_idx) + return -EALREADY; + + se = kzalloc(sizeof(struct nfc_se), GFP_KERNEL); + if (!se) + return -ENOMEM; + + se->idx = se_idx; + se->type = type; + se->state = NFC_SE_DISABLED; + INIT_LIST_HEAD(&se->list); + + list_add(&se->list, &dev->secure_elements); + + return 0; +} +EXPORT_SYMBOL(nfc_add_se); + +int nfc_remove_se(struct nfc_dev *dev, u32 se_idx) +{ + struct nfc_se *se, *n; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) + if (se->idx == se_idx) { + list_del(&se->list); + kfree(se); + + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL(nfc_remove_se); + static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); @@ -856,9 +899,9 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; - dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; + INIT_LIST_HEAD(&dev->secure_elements); nfc_genl_data_init(&dev->genl_data); -- cgit v1.2.3 From 2757c3723c3d2b13e3a8bfaa034826f64e9cca43 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 15:47:37 +0200 Subject: NFC: Send netlink events for secure elements additions and removals When an NFC driver or host controller stack discovers a secure element, it will call nfc_add_se(). In order for userspace applications to use these secure elements, a netlink event will then be sent with the SE index and its type. With that information userspace applications can decide wether or not to enable SEs, through their indexes. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 14 +++++++++++++ net/nfc/netlink.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 3 +++ 3 files changed, 80 insertions(+) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index dacadfbcace..bb5f16cfc20 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -763,6 +763,7 @@ EXPORT_SYMBOL(nfc_driver_failure); int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) { struct nfc_se *se, *n; + int rc; pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); @@ -781,6 +782,14 @@ int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) list_add(&se->list, &dev->secure_elements); + rc = nfc_genl_se_added(dev, se_idx, type); + if (rc < 0) { + list_del(&se->list); + kfree(se); + + return rc; + } + return 0; } EXPORT_SYMBOL(nfc_add_se); @@ -788,11 +797,16 @@ EXPORT_SYMBOL(nfc_add_se); int nfc_remove_se(struct nfc_dev *dev, u32 se_idx) { struct nfc_se *se, *n; + int rc; pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); list_for_each_entry_safe(se, n, &dev->secure_elements, list) if (se->idx == se_idx) { + rc = nfc_genl_se_removed(dev, se_idx); + if (rc < 0) + return rc; + list_del(&se->list); kfree(se); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index fdbc662c564..8a11a3a27e6 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -426,6 +426,69 @@ free_msg: return rc; } +int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_ADDED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || + nla_put_u8(msg, NFC_ATTR_SE_TYPE, type)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + +int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_REMOVED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index cf0c4816599..a6aeee094aa 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -94,6 +94,9 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev); int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list); +int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type); +int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx); + struct nfc_dev *nfc_get_device(unsigned int idx); static inline void nfc_put_device(struct nfc_dev *dev) -- cgit v1.2.3 From ee656e9d0993144f4e4ad261aefeeaab9554cd3f Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 15:53:29 +0200 Subject: NFC: Remove and free all SEs when releasing an NFC device Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index bb5f16cfc20..5b60b9ddfc8 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -820,11 +820,19 @@ EXPORT_SYMBOL(nfc_remove_se); static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); + struct nfc_se *se, *n; pr_debug("dev_name=%s\n", dev_name(&dev->dev)); nfc_genl_data_exit(&dev->genl_data); kfree(dev->targets); + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) { + nfc_genl_se_removed(dev, se->idx); + list_del(&se->list); + kfree(se); + } + kfree(dev); } -- cgit v1.2.3 From c531c9ec2969860c98a8a47f501c4874278388d3 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 16:15:32 +0200 Subject: NFC: Add secure element enablement internal API Called via netlink, this API will enable or disable a specific secure element. When a secure element is enabled, it will handle card emulation and more generically ISO-DEP target mode, i.e. all target mode cases except for p2p target mode. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- net/nfc/nfc.h | 3 ++ 2 files changed, 109 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index 5b60b9ddfc8..dc96a83aa6a 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -528,6 +528,108 @@ error: return rc; } +static struct nfc_se *find_se(struct nfc_dev *dev, u32 se_idx) +{ + struct nfc_se *se, *n; + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) + if (se->idx == se_idx) + return se; + + return NULL; +} + +int nfc_enable_se(struct nfc_dev *dev, u32 se_idx) +{ + + struct nfc_se *se; + int rc; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (!dev->dev_up) { + rc = -ENODEV; + goto error; + } + + if (dev->polling) { + rc = -EBUSY; + goto error; + } + + if (!dev->ops->enable_se || !dev->ops->disable_se) { + rc = -EOPNOTSUPP; + goto error; + } + + se = find_se(dev, se_idx); + if (!se) { + rc = -EINVAL; + goto error; + } + + if (se->type == NFC_SE_ENABLED) { + rc = -EALREADY; + goto error; + } + + rc = dev->ops->enable_se(dev, se_idx); + +error: + device_unlock(&dev->dev); + return rc; +} + +int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) +{ + + struct nfc_se *se; + int rc; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (!dev->dev_up) { + rc = -ENODEV; + goto error; + } + + if (!dev->ops->enable_se || !dev->ops->disable_se) { + rc = -EOPNOTSUPP; + goto error; + } + + se = find_se(dev, se_idx); + if (!se) { + rc = -EINVAL; + goto error; + } + + if (se->type == NFC_SE_DISABLED) { + rc = -EALREADY; + goto error; + } + + rc = dev->ops->disable_se(dev, se_idx); + +error: + device_unlock(&dev->dev); + return rc; +} + int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len) { pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len); @@ -762,14 +864,14 @@ EXPORT_SYMBOL(nfc_driver_failure); int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) { - struct nfc_se *se, *n; + struct nfc_se *se; int rc; pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); - list_for_each_entry_safe(se, n, &dev->secure_elements, list) - if (se->idx == se_idx) - return -EALREADY; + se = find_se(dev, se_idx); + if (se) + return -EALREADY; se = kzalloc(sizeof(struct nfc_se), GFP_KERNEL); if (!se) diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index a6aeee094aa..ee85a1fc1b2 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -147,4 +147,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx); int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context); +int nfc_enable_se(struct nfc_dev *dev, u32 se_idx); +int nfc_disable_se(struct nfc_dev *dev, u32 se_idx); + #endif /* __LOCAL_NFC_H */ -- cgit v1.2.3 From be0856535c64697685af47c5bf2be9f36ab5ca08 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 10 May 2013 17:07:32 +0200 Subject: NFC: Add secure element enablement netlink API Enabling or disabling an NFC accessible secure element through netlink requires giving both an NFC controller and a secure element indexes. Once enabled the secure element will handle card emulation once polling starts. Signed-off-by: Samuel Ortiz --- net/nfc/netlink.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'net') diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 8a11a3a27e6..b05ad909778 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1145,6 +1145,52 @@ free_msg: return -EMSGSIZE; } +static int nfc_genl_enable_se(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx, se_idx; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_SE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + rc = nfc_enable_se(dev, se_idx); + + nfc_put_device(dev); + return rc; +} + +static int nfc_genl_disable_se(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx, se_idx; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_SE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + rc = nfc_disable_se(dev, se_idx); + + nfc_put_device(dev); + return rc; +} + static struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -1209,6 +1255,16 @@ static struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_fw_upload, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_ENABLE_SE, + .doit = nfc_genl_enable_se, + .policy = nfc_genl_policy, + }, + { + .cmd = NFC_CMD_DISABLE_SE, + .doit = nfc_genl_disable_se, + .policy = nfc_genl_policy, + }, }; -- cgit v1.2.3 From 58e3dd1558f56e95e7077a63340bb33e7aa42946 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Tue, 4 Jun 2013 11:34:50 +0200 Subject: NFC: Rename nfc_llcp_disconnect() to nfc_llcp_send_disconnect() nfc_llcp_send_disconnect() already exists but is not used. nfc_llcp_disconnect() naming is not consistent with other PDU sending functions. This patch removes nfc_llcp_send_disconnect() and renames nfc_llcp_disconnect() Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- net/nfc/llcp.h | 1 - net/nfc/llcp_commands.c | 22 +--------------------- net/nfc/llcp_sock.c | 4 ++-- 3 files changed, 3 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index ff8c434f7df..ac16ebe3069 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -246,7 +246,6 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head); void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); -int nfc_llcp_disconnect(struct nfc_llcp_sock *sock); int nfc_llcp_send_symm(struct nfc_dev *dev); int nfc_llcp_send_connect(struct nfc_llcp_sock *sock); int nfc_llcp_send_cc(struct nfc_llcp_sock *sock); diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index c1b23eef83c..1017894807c 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -339,7 +339,7 @@ static struct sk_buff *llcp_allocate_pdu(struct nfc_llcp_sock *sock, return skb; } -int nfc_llcp_disconnect(struct nfc_llcp_sock *sock) +int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock) { struct sk_buff *skb; struct nfc_dev *dev; @@ -630,26 +630,6 @@ int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason) return 0; } -int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock) -{ - struct sk_buff *skb; - struct nfc_llcp_local *local; - - pr_debug("Send DISC\n"); - - local = sock->local; - if (local == NULL) - return -ENODEV; - - skb = llcp_allocate_pdu(sock, LLCP_PDU_DISC, 0); - if (skb == NULL) - return -ENOMEM; - - skb_queue_head(&local->tx_queue, skb); - - return 0; -} - int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, struct msghdr *msg, size_t len) { diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 380253eccb7..03fd3162cee 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -603,7 +603,7 @@ static int llcp_sock_release(struct socket *sock) /* Send a DISC */ if (sk->sk_state == LLCP_CONNECTED) - nfc_llcp_disconnect(llcp_sock); + nfc_llcp_send_disconnect(llcp_sock); if (sk->sk_state == LLCP_LISTEN) { struct nfc_llcp_sock *lsk, *n; @@ -614,7 +614,7 @@ static int llcp_sock_release(struct socket *sock) accept_sk = &lsk->sk; lock_sock(accept_sk); - nfc_llcp_disconnect(lsk); + nfc_llcp_send_disconnect(lsk); nfc_llcp_accept_unlink(accept_sk); release_sock(accept_sk); -- cgit v1.2.3 From 17f7ae16aef1f58bc4af4c7a16b8778a91a30255 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Tue, 4 Jun 2013 11:34:51 +0200 Subject: NFC: Keep socket alive until the DISC PDU is actually sent This patch keeps the socket alive and therefore does not remove it from the sockets list in the local until the DISC PDU has been actually sent. Otherwise we would reply with DM PDUs before sending the DISC one. Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- net/nfc/llcp.h | 1 + net/nfc/llcp_core.c | 7 +++++++ net/nfc/llcp_sock.c | 7 +++++++ 3 files changed, 15 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index ac16ebe3069..71f649e5ef4 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -19,6 +19,7 @@ enum llcp_state { LLCP_CONNECTED = 1, /* wait_for_packet() wants that */ + LLCP_DISCONNECTING, LLCP_CLOSED, LLCP_BOUND, LLCP_LISTEN, diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 158bdbf668c..1c4c048e0a1 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -730,6 +730,13 @@ static void nfc_llcp_tx_work(struct work_struct *work) DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); + if (ptype == LLCP_PDU_DISC && sk != NULL && + sk->sk_state == LLCP_DISCONNECTING) { + nfc_llcp_sock_unlink(&local->sockets, sk); + sock_orphan(sk); + sock_put(sk); + } + if (ptype == LLCP_PDU_I) copy_skb = skb_copy(skb, GFP_ATOMIC); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 03fd3162cee..47e7acfc023 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -626,6 +626,13 @@ static int llcp_sock_release(struct socket *sock) release_sock(sk); + /* Keep this sock alive and therefore do not remove it from the sockets + * list until the DISC PDU has been actually sent. Otherwise we would + * reply with DM PDUs before sending the DISC one. + */ + if (sk->sk_state == LLCP_DISCONNECTING) + return err; + if (sock->type == SOCK_RAW) nfc_llcp_sock_unlink(&local->raw_sockets, sk); else -- cgit v1.2.3 From f1b79dc8915ebf176d6f1fcfc4fee001b6d5ca46 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Wed, 5 Jun 2013 17:15:59 +0200 Subject: NFC: Fix a potential memory leak In nfc_llcp_tx_work() the sk_buff is not freed when the llcp_sock is null and the PDU is an I one. Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- net/nfc/llcp_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 1c4c048e0a1..44730f0edfd 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -719,6 +719,7 @@ static void nfc_llcp_tx_work(struct work_struct *work) llcp_sock = nfc_llcp_sock(sk); if (llcp_sock == NULL && nfc_llcp_ptype(skb) == LLCP_PDU_I) { + kfree_skb(skb); nfc_llcp_send_symm(local->dev); } else { struct sk_buff *copy_skb = NULL; -- cgit v1.2.3 From b4011239a08e7e6c2c6e970dfa9e8ecb73139261 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 3 May 2013 18:29:30 +0200 Subject: NFC: llcp: Fix non blocking sockets connections Without the new LLCP_CONNECTING state, non blocking sockets will be woken up with a POLLHUP right after calling connect() because their state is stuck at LLCP_CLOSED. That prevents userspace from implementing any proper non blocking socket based NFC p2p client. Cc: stable@vger.kernel.org Signed-off-by: Samuel Ortiz --- net/nfc/llcp.h | 1 + net/nfc/llcp_sock.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index 71f649e5ef4..f4d48b57ea1 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -19,6 +19,7 @@ enum llcp_state { LLCP_CONNECTED = 1, /* wait_for_packet() wants that */ + LLCP_CONNECTING, LLCP_DISCONNECTING, LLCP_CLOSED, LLCP_BOUND, diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 47e7acfc023..d308402b67d 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -571,7 +571,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; - if (sock_writeable(sk)) + if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -729,14 +729,16 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, if (ret) goto sock_unlink; + sk->sk_state = LLCP_CONNECTING; + ret = sock_wait_state(sk, LLCP_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); - if (ret) + if (ret && ret != -EINPROGRESS) goto sock_unlink; release_sock(sk); - return 0; + return ret; sock_unlink: nfc_llcp_put_ssap(local, llcp_sock->ssap); -- cgit v1.2.3 From 2635a4bdfa8d513c531fa7d7a0ccafc1d6a9ff85 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 28 May 2013 15:03:17 +0200 Subject: NFC: llcp: Do not send pending Tx frames when the remote is not ready When we receive a RNR, the remote is busy processing the last received frame. We set a local flag for that, and we should send a SYMM when it is set instead of sending any pending frame. Signed-off-by: Samuel Ortiz --- net/nfc/llcp_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 44730f0edfd..47746a088f8 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -721,6 +721,9 @@ static void nfc_llcp_tx_work(struct work_struct *work) if (llcp_sock == NULL && nfc_llcp_ptype(skb) == LLCP_PDU_I) { kfree_skb(skb); nfc_llcp_send_symm(local->dev); + } else if (llcp_sock && !llcp_sock->remote_ready) { + skb_queue_head(&local->tx_queue, skb); + nfc_llcp_send_symm(local->dev); } else { struct sk_buff *copy_skb = NULL; u8 ptype = nfc_llcp_ptype(skb); -- cgit v1.2.3 From f768b34017cbe6e7690686514f682f076bb1f477 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 28 May 2013 15:41:32 +0200 Subject: NFC: llcp: Set the LLC Link Management well known service bit In order to advertise our LLCP support properly and to follow the LLCP specs requirements, we need to initialize the WKS (Well-Known Services) bitfield to 1 as SAP 0 is the only mandatory supported service. Signed-off-by: Samuel Ortiz --- net/nfc/llcp_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 47746a088f8..d45bcbbc9f7 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1590,6 +1590,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) local->lto = 150; /* 1500 ms */ local->rw = LLCP_MAX_RW; local->miux = cpu_to_be16(LLCP_MAX_MIUX); + local->local_wks = 0x1; /* LLC Link Management */ nfc_llcp_build_gb(local); -- cgit v1.2.3 From 4ca546e5545b7345b69e9331ecd53a1e4c6f7fe1 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 3 Jun 2013 12:10:04 +0200 Subject: NFC: llcp: Fix the well known services endianness The WKS (Well Known Services) bitmask should be transmitted in big endian order. Picky implementations will refuse to establish an LLCP link when the WKS bit 0 is not set to 1. The vast majority of implementations out there are not that picky though... Signed-off-by: Samuel Ortiz --- net/nfc/llcp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index d45bcbbc9f7..81cd3416c7d 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -537,6 +537,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) u8 *lto_tlv, lto_length; u8 *wks_tlv, wks_length; u8 *miux_tlv, miux_length; + __be16 wks = cpu_to_be16(local->local_wks); u8 gb_len = 0; int ret = 0; @@ -549,8 +550,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) gb_len += lto_length; pr_debug("Local wks 0x%lx\n", local->local_wks); - wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&local->local_wks, 2, - &wks_length); + wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length); gb_len += wks_length; miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, -- cgit v1.2.3 From f44f340883388b57fe03edfb0982e038e57a992c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 13 May 2013 08:15:26 -0700 Subject: openvswitch: Immediately exit on error in ovs_vport_cmd_set(). It is an error to try to change the type of a vport using the set command. However, while we check that this is an error, we still proceed to allocate memory which then gets freed immediately. This stops processing after noticing the error, which does not actually fix a bug but is more correct. Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d12d6b8b5e8..748aa97cbfb 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1812,10 +1812,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - err = 0; if (a[OVS_VPORT_ATTR_TYPE] && - nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) + nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; + goto exit_unlock; + } reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!reply) { @@ -1823,10 +1824,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - if (!err && a[OVS_VPORT_ATTR_OPTIONS]) + if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); - if (err) - goto exit_free; + if (err) + goto exit_free; + } if (a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); -- cgit v1.2.3 From cbd531bebb02bc6c0fc3619a2cfc32f7d8843b18 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 13 May 2013 08:16:29 -0700 Subject: openvswitch: Remove unused get_config vport op. The get_config vport op is left over from old compatibility code, it is neither used nor implemented any more. Signed-off-by: Jesse Gross --- net/openvswitch/vport-netdev.h | 1 - net/openvswitch/vport.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index a3cb3a32cd7..dd298b5c5cd 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -39,6 +39,5 @@ netdev_vport_priv(const struct vport *vport) } const char *ovs_netdev_get_name(const struct vport *); -const char *ovs_netdev_get_config(const struct vport *); #endif /* vport_netdev.h */ diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 68a377bc084..26c594b1a47 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -123,8 +123,6 @@ struct vport_parms { * existing vport to a &struct sk_buff. May be %NULL for a vport that does not * have any configuration. * @get_name: Get the device's name. - * @get_config: Get the device's configuration. - * May be null if the device does not have an ifindex. * @send: Send a packet on the device. Returns the length of the packet sent. */ struct vport_ops { @@ -139,7 +137,6 @@ struct vport_ops { /* Called with rcu_read_lock or ovs_mutex. */ const char *(*get_name)(const struct vport *); - void (*get_config)(const struct vport *, void *); int (*send)(struct vport *, struct sk_buff *); }; -- cgit v1.2.3 From 91b7514cdff406ad8f63d09b74f664c37bed2e01 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 13 May 2013 08:22:34 -0700 Subject: openvswitch: Unify vport error stats handling. Following patch changes vport->send return type so that vport layer can do error accounting. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/vport-netdev.c | 5 ++--- net/openvswitch/vport.c | 9 +++++++-- net/openvswitch/vport.h | 3 ++- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 4f01c6d2ffa..43712217a37 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -170,7 +170,7 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", netdev_vport->dev->name, packet_length(skb), mtu); - goto error; + goto drop; } skb->dev = netdev_vport->dev; @@ -179,9 +179,8 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) return len; -error: +drop: kfree_skb(skb); - ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); return 0; } diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 720623190ea..7f20f6d1be9 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -351,7 +351,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) { int sent = vport->ops->send(vport, skb); - if (likely(sent)) { + if (likely(sent > 0)) { struct pcpu_tstats *stats; stats = this_cpu_ptr(vport->percpu_stats); @@ -360,7 +360,12 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) stats->tx_packets++; stats->tx_bytes += sent; u64_stats_update_end(&stats->syncp); - } + } else if (sent < 0) { + ovs_vport_record_error(vport, VPORT_E_TX_ERROR); + kfree_skb(skb); + } else + ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); + return sent; } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 26c594b1a47..1cef5cd3be4 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -123,7 +123,8 @@ struct vport_parms { * existing vport to a &struct sk_buff. May be %NULL for a vport that does not * have any configuration. * @get_name: Get the device's name. - * @send: Send a packet on the device. Returns the length of the packet sent. + * @send: Send a packet on the device. Returns the length of the packet sent, + * zero for dropped packets or negative for error. */ struct vport_ops { enum ovs_vport_type type; -- cgit v1.2.3 From 34d94f2102fb361030569eb983751ed80742f6e9 Mon Sep 17 00:00:00 2001 From: Lorand Jakab Date: Mon, 3 Jun 2013 10:01:14 -0700 Subject: openvswitch: fix variable names in comment Signed-off-by: Lorand Jakab Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b15321a2228..33df0913358 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -590,10 +590,10 @@ out: * - skb->network_header: just past the Ethernet header, or just past the * VLAN header, to the first byte of the Ethernet payload. * - * - skb->transport_header: If key->dl_type is ETH_P_IP or ETH_P_IPV6 + * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 * on output, then just past the IP header, if one is present and * of a correct length, otherwise the same as skb->network_header. - * For other key->dl_type values it is left untouched. + * For other key->eth.type values it is left untouched. */ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, int *key_lenp) -- cgit v1.2.3 From af7841636bb29575fe27faaeb351146e87217717 Mon Sep 17 00:00:00 2001 From: Andy Hill Date: Fri, 7 Jun 2013 16:53:50 -0700 Subject: openvswitch: Fix misspellings in comments and docs. Flagged with: https://github.com/lyda/misspell-check Run with: git ls-files | misspellings -f - Signed-off-by: Andy Hill Signed-off-by: Jesse Gross --- net/openvswitch/vport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 7f20f6d1be9..176d449351e 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -376,7 +376,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) * @err_type: one of enum vport_err_type types to indicate the error type * * If using the vport generic stats layer indicate that an error of the given - * type has occured. + * type has occurred. */ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) { -- cgit v1.2.3 From b34df5e805a6e98cae0bc5bc80c1b52d9ff811de Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 13 Jun 2013 11:11:44 -0700 Subject: openvswitch: make skb->csum consistent with rest of networking stack. Following patch keeps skb->csum correct across ovs. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/actions.c | 4 ++++ net/openvswitch/flow.c | 3 +++ net/openvswitch/vport-internal_dev.c | 1 + net/openvswitch/vport-netdev.c | 2 ++ net/openvswitch/vport.h | 7 +++++++ 5 files changed, 17 insertions(+) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 894b6cbdd92..596d6373399 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -130,9 +130,13 @@ static int set_eth_addr(struct sk_buff *skb, if (unlikely(err)) return err; + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN); memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN); + ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + return 0; } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 33df0913358..fca483360ce 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -618,6 +618,9 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); __skb_pull(skb, 2 * ETH_ALEN); + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. + */ if (vlan_tx_tag_present(skb)) key->eth.tci = htons(skb->vlan_tci); diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 84e0a037918..e284c7e1fec 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -221,6 +221,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) skb->dev = netdev; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); netif_rx(skb); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 43712217a37..40de815b421 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -49,6 +49,8 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) return; skb_push(skb, ETH_HLEN); + ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + ovs_vport_receive(vport, skb); return; diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 1cef5cd3be4..293278c4c2d 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -192,4 +192,11 @@ void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); extern const struct vport_ops ovs_netdev_vport_ops; extern const struct vport_ops ovs_internal_vport_ops; +static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); +} + #endif /* vport.h */ -- cgit v1.2.3 From 93d8fd1514b6862c3370ea92be3f3b4216e0bf8f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 13 Jun 2013 11:11:32 -0700 Subject: openvswitch: Simplify interface ovs_flow_metadata_from_nlattrs() This is not functional change, this is just code cleanup. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 5 +---- net/openvswitch/flow.c | 22 ++++++++++------------ net/openvswitch/flow.h | 4 ++-- 3 files changed, 13 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 748aa97cbfb..0f783d9fa00 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -739,10 +739,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, - &flow->key.phy.skb_mark, - &flow->key.phy.in_port, - a[OVS_PACKET_ATTR_KEY]); + err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index fca483360ce..093c191d4fc 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -1125,10 +1125,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, /** * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. - * @priority: receives the skb priority - * @mark: receives the skb mark - * @in_port: receives the extracted input port. - * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * @flow: Receives extracted in_port, priority, tun_key and skb_mark. + * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. * * This parses a series of Netlink attributes that form a flow key, which must @@ -1136,15 +1134,15 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, - const struct nlattr *attr) +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, + const struct nlattr *attr) { const struct nlattr *nla; int rem; - *in_port = DP_MAX_PORTS; - *priority = 0; - *mark = 0; + flow->key.phy.in_port = DP_MAX_PORTS; + flow->key.phy.priority = 0; + flow->key.phy.skb_mark = 0; nla_for_each_nested(nla, attr, rem) { int type = nla_type(nla); @@ -1155,17 +1153,17 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, switch (type) { case OVS_KEY_ATTR_PRIORITY: - *priority = nla_get_u32(nla); + flow->key.phy.priority = nla_get_u32(nla); break; case OVS_KEY_ATTR_IN_PORT: if (nla_get_u32(nla) >= DP_MAX_PORTS) return -EINVAL; - *in_port = nla_get_u32(nla); + flow->key.phy.in_port = nla_get_u32(nla); break; case OVS_KEY_ATTR_SKB_MARK: - *mark = nla_get_u32(nla); + flow->key.phy.skb_mark = nla_get_u32(nla); break; } } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 0875fde65b9..2a83e2141f0 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -141,8 +141,8 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, - const struct nlattr *); +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, + const struct nlattr *attr); #define MAX_ACTIONS_BUFSIZE (16 * 1024) #define TBL_MIN_BUCKETS 1024 -- cgit v1.2.3 From f9e42b853523cda0732022c2e0473c183f7aec65 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Jun 2013 18:24:04 +0200 Subject: net: sctp: sideeffect: throw BUG if primary_path is NULL This clearly states a BUG somewhere in the SCTP code as e.g. fixed once in f28156335 ("sctp: Use correct sideffect command in duplicate cookie handling"). If this ever happens, throw a trace in the sideeffect engine where assocs clearly must have a primary_path assigned. When in sctp_seq_dump_local_addrs() also throw a WARN and bail out since we do not need to panic for printing this one asterisk. Also, it will avoid the not so obvious case when primary != NULL test passes and at a later point in time triggering a NULL ptr dereference caused by primary. While at it, also fix up the white space. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/proc.c | 12 +++++++++--- net/sctp/sm_sideeffect.c | 5 ++++- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 4e45ee35d0d..0c83162a6bf 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -134,9 +134,15 @@ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_commo struct sctp_af *af; if (epb->type == SCTP_EP_TYPE_ASSOCIATION) { - asoc = sctp_assoc(epb); - peer = asoc->peer.primary_path; - primary = &peer->saddr; + asoc = sctp_assoc(epb); + + peer = asoc->peer.primary_path; + if (unlikely(peer == NULL)) { + WARN(1, "Association %p with NULL primary path!", asoc); + return; + } + + primary = &peer->saddr; } rcu_read_lock(); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8aab894aeab..ff91f47b023 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -864,6 +864,7 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; + BUG_ON(asoc->peer.primary_path == NULL); sctp_unhash_established(asoc); sctp_association_free(asoc); } @@ -1274,8 +1275,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_outq_uncork(&asoc->outqueue); local_cork = 0; } - asoc = cmd->obj.asoc; + /* Register with the endpoint. */ + asoc = cmd->obj.asoc; + BUG_ON(asoc->peer.primary_path == NULL); sctp_endpoint_add_asoc(ep, asoc); sctp_hash_established(asoc); break; -- cgit v1.2.3 From 405426f6ca8ac2d8d5b1f8eb9285452d44222781 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Jun 2013 18:24:05 +0200 Subject: net: sctp: sctp_sf_do_prm_asoc: do SCTP_CMD_INIT_CHOOSE_TRANSPORT first While this currently cannot trigger any NULL pointer dereference in sctp_seq_dump_local_addrs(), better change the order of commands to prevent a future bug to happen. Although we first add SCTP_CMD_NEW_ASOC and then set the SCTP_CMD_INIT_CHOOSE_TRANSPORT, it is okay for now, since this primitive is only called by sctp_connect() or sctp_sendmsg() with sctp_assoc_add_peer() set first. However, lets do this precaution and first set the transport and then add it to the association hashlist to prevent in future something to possibly triggering this. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index de1a0138317..b3d18685651 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4632,16 +4632,16 @@ sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, if (!repl) goto nomem; + /* Choose transport for INIT. */ + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, + SCTP_CHUNK(repl)); + /* Cast away the const modifier, as we want to just * rerun it through as a sideffect. */ my_asoc = (struct sctp_association *)asoc; sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(my_asoc)); - /* Choose transport for INIT. */ - sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, - SCTP_CHUNK(repl)); - /* After sending the INIT, "A" starts the T1-init timer and * enters the COOKIE-WAIT state. */ -- cgit v1.2.3 From c164b8381496ca797c37671fe510a264412ccae5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Jun 2013 18:24:06 +0200 Subject: net: sctp: minor: remove variable in sctp_init_sock It's only used at this one time, so we could remove it as well. This is valid and also makes it more explicit/obvious that in case of error the sp->ep is NULL here, i.e. for the sctp_destroy_sock() check that was recently added. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f631c5ff4db..510dc79a32a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3862,7 +3862,6 @@ out: SCTP_STATIC int sctp_init_sock(struct sock *sk) { struct net *net = sock_net(sk); - struct sctp_endpoint *ep; struct sctp_sock *sp; SCTP_DEBUG_PRINTK("sctp_init_sock(sk: %p)\n", sk); @@ -3971,11 +3970,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) * change the data structure relationships, this may still * be useful for storing pre-connect address information. */ - ep = sctp_endpoint_new(sk, GFP_KERNEL); - if (!ep) + sp->ep = sctp_endpoint_new(sk, GFP_KERNEL); + if (!sp->ep) return -ENOMEM; - sp->ep = ep; sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); -- cgit v1.2.3 From 2e0c9e7911465b29daf85f7de97949004bf7b31c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Jun 2013 18:24:07 +0200 Subject: net: sctp: sctp_association_init: put refs in reverse order In case we need to bail out for whatever reason during assoc init, we call sctp_endpoint_put() and then sock_put(), however, we've hold both refs in reverse, non-symmetric order, so first sctp_endpoint_hold() and then sock_hold(). Reverse this, so that in an error case we have sock_put() and then sctp_endpoint_put(). Actually shouldn't matter too much, since both cleanup paths do the right thing, but that way, it is more consistent with the rest of the code. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 91cfd8f94a1..756025c98e8 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -86,10 +86,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Discarding const is appropriate here. */ asoc->ep = (struct sctp_endpoint *)ep; - sctp_endpoint_hold(asoc->ep); - - /* Hold the sock. */ asoc->base.sk = (struct sock *)sk; + + sctp_endpoint_hold(asoc->ep); sock_hold(asoc->base.sk); /* Initialize the common base substructure. */ @@ -343,8 +342,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a return asoc; fail_init: - sctp_endpoint_put(asoc->ep); sock_put(asoc->base.sk); + sctp_endpoint_put(asoc->ep); return NULL; } -- cgit v1.2.3 From eb6db622825b2028df74f490b8c36887cf3c2f50 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Fri, 14 Jun 2013 16:33:25 +0300 Subject: net: change sysctl_net_ll_poll into an unsigned int There is no reason for sysctl_net_ll_poll to be an unsigned long. Change it into an unsigned int. Fix the proc handler. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 4 ++-- net/socket.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 78c746e016a..62702c2053d 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -302,9 +302,9 @@ static struct ctl_table net_core_table[] = { { .procname = "low_latency_poll", .data = &sysctl_net_ll_poll, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_doulongvec_minmax + .proc_handler = proc_dointvec }, #endif #endif /* CONFIG_NET */ diff --git a/net/socket.c b/net/socket.c index 21fd29f63ed..caaffa14e87 100644 --- a/net/socket.c +++ b/net/socket.c @@ -107,7 +107,7 @@ #include #ifdef CONFIG_NET_LL_RX_POLL -unsigned long sysctl_net_ll_poll __read_mostly; +unsigned int sysctl_net_ll_poll __read_mostly; EXPORT_SYMBOL_GPL(sysctl_net_ll_poll); #endif -- cgit v1.2.3 From 9a3c71aa802499e0b1db2788ccc75a56c5f00555 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Fri, 14 Jun 2013 16:33:35 +0300 Subject: net: convert low latency sockets to sched_clock() Use sched_clock() instead of get_cycles(). We can use sched_clock() because we don't care much about accuracy. Remove the dependency on X86_TSC Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index d6a9ce6e180..e591668fb38 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -245,7 +245,6 @@ config NETPRIO_CGROUP config NET_LL_RX_POLL bool "Low Latency Receive Poll" - depends on X86_TSC default n ---help--- Support Low Latency Receive Queue Poll. -- cgit v1.2.3 From 89bf1b5a683df497c572c4d3bd3f9c9aa919d773 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Fri, 14 Jun 2013 16:33:46 +0300 Subject: net: remove NET_LL_RX_POLL config menue Remove NET_LL_RX_POLL from the config menu. Change default to y. Busy polling still needs to be enabled at run time. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/Kconfig | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index e591668fb38..51da8394384 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -244,15 +244,8 @@ config NETPRIO_CGROUP a per-interface basis config NET_LL_RX_POLL - bool "Low Latency Receive Poll" - default n - ---help--- - Support Low Latency Receive Queue Poll. - (For network card drivers which support this option.) - When waiting for data in read or poll call directly into the the device driver - to flush packets which may be pending on the device queues into the stack. - - If unsure, say N. + boolean + default y config BQL boolean -- cgit v1.2.3 From dafcc4380deec21d160c31411f33c8813f67f517 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Fri, 14 Jun 2013 16:33:57 +0300 Subject: net: add socket option for low latency polling adds a socket option for low latency polling. This allows overriding the global sysctl value with a per-socket one. Unexport sysctl_net_ll_poll since for now it's not needed in modules. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/core/sock.c | 20 ++++++++++++++++++++ net/socket.c | 1 - 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 788c0da5eed..1e744b12fda 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -913,6 +913,19 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; +#ifdef CONFIG_NET_LL_RX_POLL + case SO_LL: + /* allow unprivileged users to decrease the value */ + if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) + ret = -EPERM; + else { + if (val < 0) + ret = -EINVAL; + else + sk->sk_ll_usec = val; + } + break; +#endif default: ret = -ENOPROTOOPT; break; @@ -1170,6 +1183,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); break; +#ifdef CONFIG_NET_LL_RX_POLL + case SO_LL: + v.val = sk->sk_ll_usec; + break; +#endif + default: return -ENOPROTOOPT; } @@ -2288,6 +2307,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) #ifdef CONFIG_NET_LL_RX_POLL sk->sk_napi_id = 0; + sk->sk_ll_usec = sysctl_net_ll_poll; #endif /* diff --git a/net/socket.c b/net/socket.c index caaffa14e87..3eec3f76b49 100644 --- a/net/socket.c +++ b/net/socket.c @@ -108,7 +108,6 @@ #ifdef CONFIG_NET_LL_RX_POLL unsigned int sysctl_net_ll_poll __read_mostly; -EXPORT_SYMBOL_GPL(sysctl_net_ll_poll); #endif static int sock_no_open(struct inode *irrelevant, struct file *dontcare); -- cgit v1.2.3 From cc79dd1ba9c1021c2ac6ae200a65ec38ee8db351 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:37 -0400 Subject: tipc: change socket buffer overflow control to respect sk_rcvbuf As per feedback from the netdev community, we change the buffer overflow protection algorithm in receiving sockets so that it always respects the nominal upper limit set in sk_rcvbuf. Instead of scaling up from a small sk_rcvbuf value, which leads to violation of the configured sk_rcvbuf limit, we now calculate the weighted per-message limit by scaling down from a much bigger value, still in the same field, according to the importance priority of the received message. To allow for administrative tunability of the socket receive buffer size, we create a tipc_rmem sysctl variable to allow the user to configure an even bigger value via sysctl command. It is a size of three (min/default/max) to be consistent with things like tcp_rmem. By default, the value initialized in tipc_rmem[1] is equal to the receive socket size needed by a TIPC_CRITICAL_IMPORTANCE message. This value is also set as the default value of sk_rcvbuf. Originally-by: Jon Maloy Cc: Neil Horman Cc: Jon Maloy [Ying: added sysctl variation to Jon's original patch] Signed-off-by: Ying Xue [PG: don't compile sysctl.c if not config'd; add Documentation] Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/Makefile | 1 + net/tipc/core.c | 12 +++++++++-- net/tipc/core.h | 9 ++++++++ net/tipc/port.h | 2 ++ net/tipc/socket.c | 19 +++++++++-------- net/tipc/sysctl.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 96 insertions(+), 11 deletions(-) create mode 100644 net/tipc/sysctl.c (limited to 'net') diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 4df8e02d900..02636d0a90c 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -11,3 +11,4 @@ tipc-y += addr.o bcast.o bearer.o config.o \ socket.o log.o eth_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o +tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/core.c b/net/tipc/core.c index 7ec2c1eb94f..b0e42a08729 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -39,6 +39,7 @@ #include "name_table.h" #include "subscr.h" #include "config.h" +#include "port.h" #include @@ -50,7 +51,7 @@ u32 tipc_own_addr __read_mostly; int tipc_max_ports __read_mostly; int tipc_net_id __read_mostly; int tipc_remote_management __read_mostly; - +int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ /** * tipc_buf_acquire - creates a TIPC message buffer @@ -118,6 +119,7 @@ static void tipc_core_stop(void) tipc_nametbl_stop(); tipc_ref_table_stop(); tipc_socket_stop(); + tipc_unregister_sysctl(); } /** @@ -142,13 +144,14 @@ static int tipc_core_start(void) res = tipc_netlink_start(); if (!res) res = tipc_socket_init(); + if (!res) + res = tipc_register_sysctl(); if (res) tipc_core_stop(); return res; } - static int __init tipc_init(void) { int res; @@ -160,6 +163,11 @@ static int __init tipc_init(void) tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; + sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE; + sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 << + TIPC_CRITICAL_IMPORTANCE; + sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT; + res = tipc_core_start(); if (res) pr_err("Unable to start in single node mode\n"); diff --git a/net/tipc/core.h b/net/tipc/core.h index 0207db04179..fe7f2b7c19f 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -80,6 +80,7 @@ extern u32 tipc_own_addr __read_mostly; extern int tipc_max_ports __read_mostly; extern int tipc_net_id __read_mostly; extern int tipc_remote_management __read_mostly; +extern int sysctl_tipc_rmem[3] __read_mostly; /* * Other global variables @@ -97,6 +98,14 @@ extern void tipc_netlink_stop(void); extern int tipc_socket_init(void); extern void tipc_socket_stop(void); +#ifdef CONFIG_SYSCTL +extern int tipc_register_sysctl(void); +extern void tipc_unregister_sysctl(void); +#else +#define tipc_register_sysctl() 0 +#define tipc_unregister_sysctl() +#endif + /* * TIPC timer and signal code */ diff --git a/net/tipc/port.h b/net/tipc/port.h index fb66e2e5f4d..2485649c408 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -43,6 +43,8 @@ #include "node_subscr.h" #define TIPC_FLOW_CONTROL_WIN 512 +#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, struct sk_buff **buf, unsigned char const *data, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 515ce38e4f4..aba4255f297 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -43,8 +43,6 @@ #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ -#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ - SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ struct tipc_sock { @@ -203,6 +201,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; + sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; tipc_sk(sk)->p = tp_ptr; @@ -1233,10 +1232,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) * For all connectionless messages, by default new queue limits are * as belows: * - * TIPC_LOW_IMPORTANCE (5MB) - * TIPC_MEDIUM_IMPORTANCE (10MB) - * TIPC_HIGH_IMPORTANCE (20MB) - * TIPC_CRITICAL_IMPORTANCE (40MB) + * TIPC_LOW_IMPORTANCE (4 MB) + * TIPC_MEDIUM_IMPORTANCE (8 MB) + * TIPC_HIGH_IMPORTANCE (16 MB) + * TIPC_CRITICAL_IMPORTANCE (32 MB) * * Returns overload limit according to corresponding message importance */ @@ -1246,9 +1245,10 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) unsigned int limit; if (msg_connected(msg)) - limit = CONN_OVERLOAD_LIMIT; + limit = sysctl_tipc_rmem[2]; else - limit = sk->sk_rcvbuf << (msg_importance(msg) + 5); + limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << + msg_importance(msg); return limit; } @@ -1847,7 +1847,8 @@ static const struct net_proto_family tipc_family_ops = { static struct proto tipc_proto = { .name = "TIPC", .owner = THIS_MODULE, - .obj_size = sizeof(struct tipc_sock) + .obj_size = sizeof(struct tipc_sock), + .sysctl_rmem = sysctl_tipc_rmem }; /** diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c new file mode 100644 index 00000000000..f3fef93325a --- /dev/null +++ b/net/tipc/sysctl.c @@ -0,0 +1,64 @@ +/* + * net/tipc/sysctl.c: sysctl interface to TIPC subsystem + * + * Copyright (c) 2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" + +#include + +static struct ctl_table_header *tipc_ctl_hdr; + +static struct ctl_table tipc_table[] = { + { + .procname = "tipc_rmem", + .data = &sysctl_tipc_rmem, + .maxlen = sizeof(sysctl_tipc_rmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; + +int tipc_register_sysctl(void) +{ + tipc_ctl_hdr = register_net_sysctl(&init_net, "net/tipc", tipc_table); + if (tipc_ctl_hdr == NULL) + return -ENOMEM; + return 0; +} + +void tipc_unregister_sysctl(void) +{ + unregister_net_sysctl_table(tipc_ctl_hdr); +} -- cgit v1.2.3 From 5d21cb70db0122507cd18f58b4a9112583c1e075 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Mon, 17 Jun 2013 10:54:38 -0400 Subject: tipc: allow implicit connect for stream sockets TIPC's implied connect feature, aka piggyback connect, allows applications to save one syscall and all SYN/SYN-ACK signalling overhead when setting up a connection. Until now, this has only been supported for SEQPACKET sockets. Here, we make it possible to use this feature even with stream sockets. At the connecting side, the connection is completed when the first data message arrives from the accepting peer. This means that we must allow the connecting user to call blocking recv() before the socket has reached state SS_CONNECTED. So we must must relax the state machine check at recv_stream(), and allow the recv() call even if socket is in state SS_CONNECTING. Signed-off-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/socket.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index aba4255f297..d5fa708f037 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -518,8 +518,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, res = -EISCONN; goto exit; } - if ((tport->published) || - ((sock->type == SOCK_STREAM) && (total_len != 0))) { + if (tport->published) { res = -EOPNOTSUPP; goto exit; } @@ -1010,8 +1009,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, lock_sock(sk); - if (unlikely((sock->state == SS_UNCONNECTED) || - (sock->state == SS_CONNECTING))) { + if (unlikely((sock->state == SS_UNCONNECTED))) { res = -ENOTCONN; goto exit; } -- cgit v1.2.3 From c5fa7b3cf3cb22e4ac60485fc2dc187fe012910f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:39 -0400 Subject: tipc: introduce new TIPC server infrastructure TIPC has two internal servers, one providing a subscription service for topology events, and another providing the configuration interface. These servers have previously been running in BH context, accessing the TIPC-port (aka native) API directly. Apart from these servers, even the TIPC socket implementation is partially built on this API. As this API may simultaneously be called via different paths and in different contexts, a complex and costly lock policiy is required in order to protect TIPC internal resources. To eliminate the need for this complex lock policiy, we introduce a new, generic service API that uses kernel sockets for message passing instead of the native API. Once the toplogy and configuration servers are converted to use this new service, all code pertaining to the native API can be removed. This entails a significant reduction in code amount and complexity, and opens up for a complete rework of the locking policy in TIPC. The new service also solves another problem: As the current topology server works in BH context, it cannot easily be blocked when sending of events fails due to congestion. In such cases events may have to be silently dropped, something that is unacceptable. Therefore, the new service keeps a dedicated outbound queue receiving messages from BH context. Once messages are inserted into this queue, we will immediately schedule a work from a special workqueue. This way, messages/events from the topology server are in reality sent in process context, and the server can block if necessary. Analogously, there is a new workqueue for receiving messages. Once a notification about an arriving message is received in BH context, we schedule a work from the receive workqueue to do the job of receiving the message in process context. As both sending and receive messages are now finished in processes, subscribed events cannot be dropped any more. As of this commit, this new server infrastructure is built, but not actually yet called by the existing TIPC code, but since the conversion changes required in order to use it are significant, the addition is kept here as a separate commit. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/Makefile | 2 +- net/tipc/core.h | 8 +- net/tipc/server.c | 596 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/tipc/server.h | 94 +++++++++ net/tipc/socket.c | 99 ++++++++- 5 files changed, 789 insertions(+), 10 deletions(-) create mode 100644 net/tipc/server.c create mode 100644 net/tipc/server.h (limited to 'net') diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 02636d0a90c..b282f7130d2 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -8,7 +8,7 @@ tipc-y += addr.o bcast.o bearer.o config.o \ core.o handler.o link.o discover.o msg.o \ name_distr.o subscr.o name_table.o net.o \ netlink.o node.o node_subscr.o port.o ref.o \ - socket.o log.o eth_media.o + socket.o log.o eth_media.o server.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/core.h b/net/tipc/core.h index fe7f2b7c19f..be72f8cebc5 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -1,8 +1,8 @@ /* * net/tipc/core.h: Include file for TIPC global declarations * - * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005-2007, 2010-2011, Wind River Systems + * Copyright (c) 2005-2006, 2013 Ericsson AB + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -97,6 +97,10 @@ extern int tipc_netlink_start(void); extern void tipc_netlink_stop(void); extern int tipc_socket_init(void); extern void tipc_socket_stop(void); +extern int tipc_sock_create_local(int type, struct socket **res); +extern void tipc_sock_release_local(struct socket *sock); +extern int tipc_sock_accept_local(struct socket *sock, + struct socket **newsock, int flags); #ifdef CONFIG_SYSCTL extern int tipc_register_sysctl(void); diff --git a/net/tipc/server.c b/net/tipc/server.c new file mode 100644 index 00000000000..19da5abe0fa --- /dev/null +++ b/net/tipc/server.c @@ -0,0 +1,596 @@ +/* + * net/tipc/server.c: TIPC server infrastructure + * + * Copyright (c) 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include "core.h" +#include + +/* Number of messages to send before rescheduling */ +#define MAX_SEND_MSG_COUNT 25 +#define MAX_RECV_MSG_COUNT 25 +#define CF_CONNECTED 1 + +#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data) + +/** + * struct tipc_conn - TIPC connection structure + * @kref: reference counter to connection object + * @conid: connection identifier + * @sock: socket handler associated with connection + * @flags: indicates connection state + * @server: pointer to connected server + * @rwork: receive work item + * @usr_data: user-specified field + * @rx_action: what to do when connection socket is active + * @outqueue: pointer to first outbound message in queue + * @outqueue_lock: controll access to the outqueue + * @outqueue: list of connection objects for its server + * @swork: send work item + */ +struct tipc_conn { + struct kref kref; + int conid; + struct socket *sock; + unsigned long flags; + struct tipc_server *server; + struct work_struct rwork; + int (*rx_action) (struct tipc_conn *con); + void *usr_data; + struct list_head outqueue; + spinlock_t outqueue_lock; + struct work_struct swork; +}; + +/* An entry waiting to be sent */ +struct outqueue_entry { + struct list_head list; + struct kvec iov; + struct sockaddr_tipc dest; +}; + +static void tipc_recv_work(struct work_struct *work); +static void tipc_send_work(struct work_struct *work); +static void tipc_clean_outqueues(struct tipc_conn *con); + +static void tipc_conn_kref_release(struct kref *kref) +{ + struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct tipc_server *s = con->server; + + if (con->sock) { + tipc_sock_release_local(con->sock); + con->sock = NULL; + } + + tipc_clean_outqueues(con); + + if (con->conid) + s->tipc_conn_shutdown(con->conid, con->usr_data); + + kfree(con); +} + +static void conn_put(struct tipc_conn *con) +{ + kref_put(&con->kref, tipc_conn_kref_release); +} + +static void conn_get(struct tipc_conn *con) +{ + kref_get(&con->kref); +} + +static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) +{ + struct tipc_conn *con; + + spin_lock_bh(&s->idr_lock); + con = idr_find(&s->conn_idr, conid); + if (con) + conn_get(con); + spin_unlock_bh(&s->idr_lock); + return con; +} + +static void sock_data_ready(struct sock *sk, int unused) +{ + struct tipc_conn *con; + + read_lock(&sk->sk_callback_lock); + con = sock2con(sk); + if (con && test_bit(CF_CONNECTED, &con->flags)) { + conn_get(con); + if (!queue_work(con->server->rcv_wq, &con->rwork)) + conn_put(con); + } + read_unlock(&sk->sk_callback_lock); +} + +static void sock_write_space(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock(&sk->sk_callback_lock); + con = sock2con(sk); + if (con && test_bit(CF_CONNECTED, &con->flags)) { + conn_get(con); + if (!queue_work(con->server->send_wq, &con->swork)) + conn_put(con); + } + read_unlock(&sk->sk_callback_lock); +} + +static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) +{ + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_data_ready = sock_data_ready; + sk->sk_write_space = sock_write_space; + sk->sk_user_data = con; + + con->sock = sock; + + write_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_unregister_callbacks(struct tipc_conn *con) +{ + struct sock *sk = con->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_close_conn(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + + if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); + + tipc_unregister_callbacks(con); + + /* We shouldn't flush pending works as we may be in the + * thread. In fact the races with pending rx/tx work structs + * are harmless for us here as we have already deleted this + * connection from server connection list and set + * sk->sk_user_data to 0 before releasing connection object. + */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + + conn_put(con); + } +} + +static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) +{ + struct tipc_conn *con; + int ret; + + con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC); + if (!con) + return ERR_PTR(-ENOMEM); + + kref_init(&con->kref); + INIT_LIST_HEAD(&con->outqueue); + spin_lock_init(&con->outqueue_lock); + INIT_WORK(&con->swork, tipc_send_work); + INIT_WORK(&con->rwork, tipc_recv_work); + + spin_lock_bh(&s->idr_lock); + ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); + if (ret < 0) { + kfree(con); + spin_unlock_bh(&s->idr_lock); + return ERR_PTR(-ENOMEM); + } + con->conid = ret; + s->idr_in_use++; + spin_unlock_bh(&s->idr_lock); + + set_bit(CF_CONNECTED, &con->flags); + con->server = s; + + return con; +} + +static int tipc_receive_from_sock(struct tipc_conn *con) +{ + struct msghdr msg = {}; + struct tipc_server *s = con->server; + struct sockaddr_tipc addr; + struct kvec iov; + void *buf; + int ret; + + buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC); + if (!buf) { + ret = -ENOMEM; + goto out_close; + } + + iov.iov_base = buf; + iov.iov_len = s->max_rcvbuf_size; + msg.msg_name = &addr; + ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, + MSG_DONTWAIT); + if (ret <= 0) { + kmem_cache_free(s->rcvbuf_cache, buf); + goto out_close; + } + + s->tipc_conn_recvmsg(con->conid, &addr, con->usr_data, buf, ret); + + kmem_cache_free(s->rcvbuf_cache, buf); + + return 0; + +out_close: + if (ret != -EWOULDBLOCK) + tipc_close_conn(con); + else if (ret == 0) + /* Don't return success if we really got EOF */ + ret = -EAGAIN; + + return ret; +} + +static int tipc_accept_from_sock(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + struct socket *sock = con->sock; + struct socket *newsock; + struct tipc_conn *newcon; + int ret; + + ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK); + if (ret < 0) + return ret; + + newcon = tipc_alloc_conn(con->server); + if (IS_ERR(newcon)) { + ret = PTR_ERR(newcon); + sock_release(newsock); + return ret; + } + + newcon->rx_action = tipc_receive_from_sock; + tipc_register_callbacks(newsock, newcon); + + /* Notify that new connection is incoming */ + newcon->usr_data = s->tipc_conn_new(newcon->conid); + + /* Wake up receive process in case of 'SYN+' message */ + newsock->sk->sk_data_ready(newsock->sk, 0); + return ret; +} + +static struct socket *tipc_create_listen_sock(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + struct socket *sock = NULL; + int ret; + + ret = tipc_sock_create_local(s->type, &sock); + if (ret < 0) + return NULL; + ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, + (char *)&s->imp, sizeof(s->imp)); + if (ret < 0) + goto create_err; + ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr)); + if (ret < 0) + goto create_err; + + switch (s->type) { + case SOCK_STREAM: + case SOCK_SEQPACKET: + con->rx_action = tipc_accept_from_sock; + + ret = kernel_listen(sock, 0); + if (ret < 0) + goto create_err; + break; + case SOCK_DGRAM: + case SOCK_RDM: + con->rx_action = tipc_receive_from_sock; + break; + default: + pr_err("Unknown socket type %d\n", s->type); + goto create_err; + } + return sock; + +create_err: + sock_release(sock); + con->sock = NULL; + return NULL; +} + +static int tipc_open_listening_sock(struct tipc_server *s) +{ + struct socket *sock; + struct tipc_conn *con; + + con = tipc_alloc_conn(s); + if (IS_ERR(con)) + return PTR_ERR(con); + + sock = tipc_create_listen_sock(con); + if (!sock) + return -EINVAL; + + tipc_register_callbacks(sock, con); + return 0; +} + +static struct outqueue_entry *tipc_alloc_entry(void *data, int len) +{ + struct outqueue_entry *entry; + void *buf; + + entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC); + if (!entry) + return NULL; + + buf = kmalloc(len, GFP_ATOMIC); + if (!buf) { + kfree(entry); + return NULL; + } + + memcpy(buf, data, len); + entry->iov.iov_base = buf; + entry->iov.iov_len = len; + + return entry; +} + +static void tipc_free_entry(struct outqueue_entry *e) +{ + kfree(e->iov.iov_base); + kfree(e); +} + +static void tipc_clean_outqueues(struct tipc_conn *con) +{ + struct outqueue_entry *e, *safe; + + spin_lock_bh(&con->outqueue_lock); + list_for_each_entry_safe(e, safe, &con->outqueue, list) { + list_del(&e->list); + tipc_free_entry(e); + } + spin_unlock_bh(&con->outqueue_lock); +} + +int tipc_conn_sendmsg(struct tipc_server *s, int conid, + struct sockaddr_tipc *addr, void *data, size_t len) +{ + struct outqueue_entry *e; + struct tipc_conn *con; + + con = tipc_conn_lookup(s, conid); + if (!con) + return -EINVAL; + + e = tipc_alloc_entry(data, len); + if (!e) { + conn_put(con); + return -ENOMEM; + } + + if (addr) + memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc)); + + spin_lock_bh(&con->outqueue_lock); + list_add_tail(&e->list, &con->outqueue); + spin_unlock_bh(&con->outqueue_lock); + + if (test_bit(CF_CONNECTED, &con->flags)) + if (!queue_work(s->send_wq, &con->swork)) + conn_put(con); + + return 0; +} + +void tipc_conn_terminate(struct tipc_server *s, int conid) +{ + struct tipc_conn *con; + + con = tipc_conn_lookup(s, conid); + if (con) { + tipc_close_conn(con); + conn_put(con); + } +} + +static void tipc_send_to_sock(struct tipc_conn *con) +{ + int count = 0; + struct tipc_server *s = con->server; + struct outqueue_entry *e; + struct msghdr msg; + int ret; + + spin_lock_bh(&con->outqueue_lock); + while (1) { + e = list_entry(con->outqueue.next, struct outqueue_entry, + list); + if ((struct list_head *) e == &con->outqueue) + break; + spin_unlock_bh(&con->outqueue_lock); + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + + if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { + msg.msg_name = &e->dest; + msg.msg_namelen = sizeof(struct sockaddr_tipc); + } + ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, + e->iov.iov_len); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + goto out; + } else if (ret < 0) { + goto send_err; + } + + /* Don't starve users filling buffers */ + if (++count >= MAX_SEND_MSG_COUNT) { + cond_resched(); + count = 0; + } + + spin_lock_bh(&con->outqueue_lock); + list_del(&e->list); + tipc_free_entry(e); + } + spin_unlock_bh(&con->outqueue_lock); +out: + return; + +send_err: + tipc_close_conn(con); +} + +static void tipc_recv_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); + int count = 0; + + while (test_bit(CF_CONNECTED, &con->flags)) { + if (con->rx_action(con)) + break; + + /* Don't flood Rx machine */ + if (++count >= MAX_RECV_MSG_COUNT) { + cond_resched(); + count = 0; + } + } + conn_put(con); +} + +static void tipc_send_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, swork); + + if (test_bit(CF_CONNECTED, &con->flags)) + tipc_send_to_sock(con); + + conn_put(con); +} + +static void tipc_work_stop(struct tipc_server *s) +{ + destroy_workqueue(s->rcv_wq); + destroy_workqueue(s->send_wq); +} + +static int tipc_work_start(struct tipc_server *s) +{ + s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1); + if (!s->rcv_wq) { + pr_err("can't start tipc receive workqueue\n"); + return -ENOMEM; + } + + s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1); + if (!s->send_wq) { + pr_err("can't start tipc send workqueue\n"); + destroy_workqueue(s->rcv_wq); + return -ENOMEM; + } + + return 0; +} + +int tipc_server_start(struct tipc_server *s) +{ + int ret; + + spin_lock_init(&s->idr_lock); + idr_init(&s->conn_idr); + s->idr_in_use = 0; + + s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!s->rcvbuf_cache) + return -ENOMEM; + + ret = tipc_work_start(s); + if (ret < 0) { + kmem_cache_destroy(s->rcvbuf_cache); + return ret; + } + s->enabled = 1; + + return tipc_open_listening_sock(s); +} + +void tipc_server_stop(struct tipc_server *s) +{ + struct tipc_conn *con; + int total = 0; + int id; + + if (!s->enabled) + return; + + s->enabled = 0; + spin_lock_bh(&s->idr_lock); + for (id = 0; total < s->idr_in_use; id++) { + con = idr_find(&s->conn_idr, id); + if (con) { + total++; + spin_unlock_bh(&s->idr_lock); + tipc_close_conn(con); + spin_lock_bh(&s->idr_lock); + } + } + spin_unlock_bh(&s->idr_lock); + + tipc_work_stop(s); + kmem_cache_destroy(s->rcvbuf_cache); + idr_destroy(&s->conn_idr); +} diff --git a/net/tipc/server.h b/net/tipc/server.h new file mode 100644 index 00000000000..98b23f20bc0 --- /dev/null +++ b/net/tipc/server.h @@ -0,0 +1,94 @@ +/* + * net/tipc/server.h: Include file for TIPC server code + * + * Copyright (c) 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SERVER_H +#define _TIPC_SERVER_H + +#include "core.h" + +#define TIPC_SERVER_NAME_LEN 32 + +/** + * struct tipc_server - TIPC server structure + * @conn_idr: identifier set of connection + * @idr_lock: protect the connection identifier set + * @idr_in_use: amount of allocated identifier entry + * @rcvbuf_cache: memory cache of server receive buffer + * @rcv_wq: receive workqueue + * @send_wq: send workqueue + * @max_rcvbuf_size: maximum permitted receive message length + * @tipc_conn_new: callback will be called when new connection is incoming + * @tipc_conn_shutdown: callback will be called when connection is shut down + * @tipc_conn_recvmsg: callback will be called when message arrives + * @saddr: TIPC server address + * @name: server name + * @imp: message importance + * @type: socket type + * @enabled: identify whether server is launched or not + */ +struct tipc_server { + struct idr conn_idr; + spinlock_t idr_lock; + int idr_in_use; + struct kmem_cache *rcvbuf_cache; + struct workqueue_struct *rcv_wq; + struct workqueue_struct *send_wq; + int max_rcvbuf_size; + void *(*tipc_conn_new) (int conid); + void (*tipc_conn_shutdown) (int conid, void *usr_data); + void (*tipc_conn_recvmsg) (int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len); + struct sockaddr_tipc *saddr; + const char name[TIPC_SERVER_NAME_LEN]; + int imp; + int type; + int enabled; +}; + +int tipc_conn_sendmsg(struct tipc_server *s, int conid, + struct sockaddr_tipc *addr, void *data, size_t len); + +/** + * tipc_conn_terminate - terminate connection with server + * + * Note: Must call it in process context since it might sleep + */ +void tipc_conn_terminate(struct tipc_server *s, int conid); + +int tipc_server_start(struct tipc_server *s); + +void tipc_server_stop(struct tipc_server *s); + +#endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d5fa708f037..bd8e2cdecee 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2,7 +2,7 @@ * net/tipc/socket.c: TIPC socket API * * Copyright (c) 2001-2007, 2012 Ericsson AB - * Copyright (c) 2004-2008, 2010-2012, Wind River Systems + * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,12 +63,15 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *tport); static void tipc_data_ready(struct sock *sk, int len); static void tipc_write_space(struct sock *sk); +static int release(struct socket *sock); +static int accept(struct socket *sock, struct socket *new_sock, int flags); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; static struct proto tipc_proto; +static struct proto tipc_proto_kern; static int sockets_enabled; @@ -141,7 +144,7 @@ static void reject_rx_queue(struct sock *sk) } /** - * tipc_create - create a TIPC socket + * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) * @sock: pre-allocated socket structure * @protocol: protocol indicator (must be 0) @@ -152,8 +155,8 @@ static void reject_rx_queue(struct sock *sk) * * Returns 0 on success, errno otherwise */ -static int tipc_create(struct net *net, struct socket *sock, int protocol, - int kern) +static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, + int kern) { const struct proto_ops *ops; socket_state state; @@ -183,7 +186,11 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, } /* Allocate socket's protocol area */ - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + if (!kern) + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + else + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern); + if (sk == NULL) return -ENOMEM; @@ -218,6 +225,78 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, return 0; } +/** + * tipc_sock_create_local - create TIPC socket from inside TIPC module + * @type: socket type - SOCK_RDM or SOCK_SEQPACKET + * + * We cannot use sock_creat_kern here because it bumps module user count. + * Since socket owner and creator is the same module we must make sure + * that module count remains zero for module local sockets, otherwise + * we cannot do rmmod. + * + * Returns 0 on success, errno otherwise + */ +int tipc_sock_create_local(int type, struct socket **res) +{ + int rc; + struct sock *sk; + + rc = sock_create_lite(AF_TIPC, type, 0, res); + if (rc < 0) { + pr_err("Failed to create kernel socket\n"); + return rc; + } + tipc_sk_create(&init_net, *res, 0, 1); + + sk = (*res)->sk; + + return 0; +} + +/** + * tipc_sock_release_local - release socket created by tipc_sock_create_local + * @sock: the socket to be released. + * + * Module reference count is not incremented when such sockets are created, + * so we must keep it from being decremented when they are released. + */ +void tipc_sock_release_local(struct socket *sock) +{ + release(sock); + sock->ops = NULL; + sock_release(sock); +} + +/** + * tipc_sock_accept_local - accept a connection on a socket created + * with tipc_sock_create_local. Use this function to avoid that + * module reference count is inadvertently incremented. + * + * @sock: the accepting socket + * @newsock: reference to the new socket to be created + * @flags: socket flags + */ + +int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, + int flags) +{ + struct sock *sk = sock->sk; + int ret; + + ret = sock_create_lite(sk->sk_family, sk->sk_type, + sk->sk_protocol, newsock); + if (ret < 0) + return ret; + + ret = accept(sock, *newsock, flags); + if (ret < 0) { + sock_release(*newsock); + return ret; + } + (*newsock)->ops = sock->ops; + return ret; +} + /** * release - destroy a TIPC socket * @sock: socket to destroy @@ -1529,7 +1608,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_create(sock_net(sock->sk), new_sock, 0, 0); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; @@ -1839,7 +1918,7 @@ static const struct proto_ops stream_ops = { static const struct net_proto_family tipc_family_ops = { .owner = THIS_MODULE, .family = AF_TIPC, - .create = tipc_create + .create = tipc_sk_create }; static struct proto tipc_proto = { @@ -1849,6 +1928,12 @@ static struct proto tipc_proto = { .sysctl_rmem = sysctl_tipc_rmem }; +static struct proto tipc_proto_kern = { + .name = "TIPC", + .obj_size = sizeof(struct tipc_sock), + .sysctl_rmem = sysctl_tipc_rmem +}; + /** * tipc_socket_init - initialize TIPC socket interface * -- cgit v1.2.3 From 13a2e89873506d64d7e52f17b571da371a3e25a4 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:40 -0400 Subject: tipc: convert topology server to use new server facility As the new TIPC server infrastructure has been introduced, we can now convert the TIPC topology server to it. We get two benefits from doing this: 1) It simplifies the topology server locking policy. In the original locking policy, we placed one spin lock pointer in the tipc_subscriber structure to reuse the lock of the subscriber's server port, controlling access to members of tipc_subscriber instance. That is, we only used one lock to ensure both tipc_port and tipc_subscriber members were safely accessed. Now we introduce another spin lock for tipc_subscriber structure only protecting themselves, to get a finer granularity locking policy. Moreover, the change will allow us to make the topology server code more readable and maintainable. 2) It fixes a bug where sent subscription events may be lost when the topology port is congested. Using the new service, the topology server now queues sent events into an outgoing buffer, and then wakes up a sender process which has been blocked in workqueue context. The process will keep picking events from the buffer and send them to their respective subscribers, using the kernel socket interface, until the buffer is empty. Even if the socket is congested during transmission there is no risk that events may be dropped, since the sender process may block when needed. Some minor reordering of initialization is done, since we now have a scenario where the topology server must be started after socket initialization has taken place, as the former depends on the latter. And overall, we see a simplification of the TIPC subscriber code in making this changeover. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/core.c | 6 +- net/tipc/socket.c | 3 +- net/tipc/subscr.c | 334 +++++++++++++++--------------------------------------- net/tipc/subscr.h | 8 +- 4 files changed, 104 insertions(+), 247 deletions(-) (limited to 'net') diff --git a/net/tipc/core.c b/net/tipc/core.c index b0e42a08729..15bbe99b609 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -2,7 +2,7 @@ * net/tipc/core.c: TIPC module code * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2006, 2010-2011, Wind River Systems + * Copyright (c) 2005-2006, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -136,8 +136,6 @@ static int tipc_core_start(void) res = tipc_ref_table_init(tipc_max_ports, tipc_random); if (!res) res = tipc_nametbl_init(); - if (!res) - res = tipc_subscr_start(); if (!res) res = tipc_cfg_init(); if (!res) @@ -146,6 +144,8 @@ static int tipc_core_start(void) res = tipc_socket_init(); if (!res) res = tipc_register_sysctl(); + if (!res) + res = tipc_subscr_start(); if (res) tipc_core_stop(); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index bd8e2cdecee..d0254157a30 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -402,7 +402,8 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) else if (addr->addrtype != TIPC_ADDR_NAMESEQ) return -EAFNOSUPPORT; - if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) + if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && + (addr->addr.nameseq.type != TIPC_TOP_SRV)) return -EACCES; return (addr->scope > 0) ? diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 6b42d47029a..f6be92a6973 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -2,7 +2,7 @@ * net/tipc/subscr.c: TIPC network topology service * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005-2007, 2010-2011, Wind River Systems + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,33 +41,42 @@ /** * struct tipc_subscriber - TIPC network topology subscriber - * @port_ref: object reference to server port connecting to subscriber - * @lock: pointer to spinlock controlling access to subscriber's server port - * @subscriber_list: adjacent subscribers in top. server's list of subscribers + * @conid: connection identifier to server connecting to subscriber + * @lock: controll access to subscriber * @subscription_list: list of subscription objects for this subscriber */ struct tipc_subscriber { - u32 port_ref; - spinlock_t *lock; - struct list_head subscriber_list; + int conid; + spinlock_t lock; struct list_head subscription_list; }; -/** - * struct top_srv - TIPC network topology subscription service - * @setup_port: reference to TIPC port that handles subscription requests - * @subscription_count: number of active subscriptions (not subscribers!) - * @subscriber_list: list of ports subscribing to service - * @lock: spinlock govering access to subscriber list - */ -struct top_srv { - u32 setup_port; - atomic_t subscription_count; - struct list_head subscriber_list; - spinlock_t lock; +static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len); +static void *subscr_named_msg_event(int conid); +static void subscr_conn_shutdown_event(int conid, void *usr_data); + +static atomic_t subscription_count = ATOMIC_INIT(0); + +static struct sockaddr_tipc topsrv_addr __read_mostly = { + .family = AF_TIPC, + .addrtype = TIPC_ADDR_NAMESEQ, + .addr.nameseq.type = TIPC_TOP_SRV, + .addr.nameseq.lower = TIPC_TOP_SRV, + .addr.nameseq.upper = TIPC_TOP_SRV, + .scope = TIPC_NODE_SCOPE }; -static struct top_srv topsrv; +static struct tipc_server topsrv __read_mostly = { + .saddr = &topsrv_addr, + .imp = TIPC_CRITICAL_IMPORTANCE, + .type = SOCK_SEQPACKET, + .max_rcvbuf_size = sizeof(struct tipc_subscr), + .name = "topology_server", + .tipc_conn_recvmsg = subscr_conn_msg_event, + .tipc_conn_new = subscr_named_msg_event, + .tipc_conn_shutdown = subscr_conn_shutdown_event, +}; /** * htohl - convert value to endianness used by destination @@ -81,20 +90,13 @@ static u32 htohl(u32 in, int swap) return swap ? swab32(in) : in; } -/** - * subscr_send_event - send a message containing a tipc_event to the subscriber - * - * Note: Must not hold subscriber's server port lock, since tipc_send() will - * try to take the lock if the message is rejected and returned! - */ -static void subscr_send_event(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, +static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, u32 node) { - struct iovec msg_sect; + struct tipc_subscriber *subscriber = sub->subscriber; + struct kvec msg_sect; + int ret; msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); @@ -104,7 +106,10 @@ static void subscr_send_event(struct tipc_subscription *sub, sub->evt.found_upper = htohl(found_upper, sub->swap); sub->evt.port.ref = htohl(port_ref, sub->swap); sub->evt.port.node = htohl(node, sub->swap); - tipc_send(sub->server_ref, 1, &msg_sect, msg_sect.iov_len); + ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, + msg_sect.iov_base, msg_sect.iov_len); + if (ret < 0) + pr_err("Sending subscription event failed, no memory\n"); } /** @@ -147,21 +152,24 @@ void tipc_subscr_report_overlap(struct tipc_subscription *sub, subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); } -/** - * subscr_timeout - subscription timeout has occurred - */ static void subscr_timeout(struct tipc_subscription *sub) { - struct tipc_port *server_port; + struct tipc_subscriber *subscriber = sub->subscriber; - /* Validate server port reference (in case subscriber is terminating) */ - server_port = tipc_port_lock(sub->server_ref); - if (server_port == NULL) + /* The spin lock per subscriber is used to protect its members */ + spin_lock_bh(&subscriber->lock); + + /* Validate if the connection related to the subscriber is + * closed (in case subscriber is terminating) + */ + if (subscriber->conid == 0) { + spin_unlock_bh(&subscriber->lock); return; + } /* Validate timeout (in case subscription is being cancelled) */ if (sub->timeout == TIPC_WAIT_FOREVER) { - tipc_port_unlock(server_port); + spin_unlock_bh(&subscriber->lock); return; } @@ -171,8 +179,7 @@ static void subscr_timeout(struct tipc_subscription *sub) /* Unlink subscription from subscriber */ list_del(&sub->subscription_list); - /* Release subscriber's server port */ - tipc_port_unlock(server_port); + spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -181,64 +188,54 @@ static void subscr_timeout(struct tipc_subscription *sub) /* Now destroy subscription */ k_term_timer(&sub->timer); kfree(sub); - atomic_dec(&topsrv.subscription_count); + atomic_dec(&subscription_count); } /** * subscr_del - delete a subscription within a subscription list * - * Called with subscriber port locked. + * Called with subscriber lock held. */ static void subscr_del(struct tipc_subscription *sub) { tipc_nametbl_unsubscribe(sub); list_del(&sub->subscription_list); kfree(sub); - atomic_dec(&topsrv.subscription_count); + atomic_dec(&subscription_count); } /** * subscr_terminate - terminate communication with a subscriber * - * Called with subscriber port locked. Routine must temporarily release lock - * to enable subscription timeout routine(s) to finish without deadlocking; - * the lock is then reclaimed to allow caller to release it upon return. - * (This should work even in the unlikely event some other thread creates - * a new object reference in the interim that uses this lock; this routine will - * simply wait for it to be released, then claim it.) + * Note: Must call it in process context since it might sleep. */ static void subscr_terminate(struct tipc_subscriber *subscriber) { - u32 port_ref; + tipc_conn_terminate(&topsrv, subscriber->conid); +} + +static void subscr_release(struct tipc_subscriber *subscriber) +{ struct tipc_subscription *sub; struct tipc_subscription *sub_temp; - /* Invalidate subscriber reference */ - port_ref = subscriber->port_ref; - subscriber->port_ref = 0; - spin_unlock_bh(subscriber->lock); + spin_lock_bh(&subscriber->lock); - /* Sever connection to subscriber */ - tipc_shutdown(port_ref); - tipc_deleteport(port_ref); + /* Invalidate subscriber reference */ + subscriber->conid = 0; /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, subscription_list) { if (sub->timeout != TIPC_WAIT_FOREVER) { + spin_unlock_bh(&subscriber->lock); k_cancel_timer(&sub->timer); k_term_timer(&sub->timer); + spin_lock_bh(&subscriber->lock); } subscr_del(sub); } - - /* Remove subscriber from topology server's subscriber list */ - spin_lock_bh(&topsrv.lock); - list_del(&subscriber->subscriber_list); - spin_unlock_bh(&topsrv.lock); - - /* Reclaim subscriber lock */ - spin_lock_bh(subscriber->lock); + spin_unlock_bh(&subscriber->lock); /* Now destroy subscriber */ kfree(subscriber); @@ -247,7 +244,7 @@ static void subscr_terminate(struct tipc_subscriber *subscriber) /** * subscr_cancel - handle subscription cancellation request * - * Called with subscriber port locked. Routine must temporarily release lock + * Called with subscriber lock held. Routine must temporarily release lock * to enable the subscription timeout routine to finish without deadlocking; * the lock is then reclaimed to allow caller to release it upon return. * @@ -274,10 +271,10 @@ static void subscr_cancel(struct tipc_subscr *s, /* Cancel subscription timer (if used), then delete subscription */ if (sub->timeout != TIPC_WAIT_FOREVER) { sub->timeout = TIPC_WAIT_FOREVER; - spin_unlock_bh(subscriber->lock); + spin_unlock_bh(&subscriber->lock); k_cancel_timer(&sub->timer); k_term_timer(&sub->timer); - spin_lock_bh(subscriber->lock); + spin_lock_bh(&subscriber->lock); } subscr_del(sub); } @@ -285,7 +282,7 @@ static void subscr_cancel(struct tipc_subscr *s, /** * subscr_subscribe - create subscription for subscriber * - * Called with subscriber port locked. + * Called with subscriber lock held. */ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, struct tipc_subscriber *subscriber) @@ -304,7 +301,7 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, } /* Refuse subscription if global limit exceeded */ - if (atomic_read(&topsrv.subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { + if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", TIPC_MAX_SUBSCRIPTIONS); subscr_terminate(subscriber); @@ -335,10 +332,10 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, } INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); - sub->server_ref = subscriber->port_ref; + sub->subscriber = subscriber; sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); - atomic_inc(&topsrv.subscription_count); + atomic_inc(&subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { k_init_timer(&sub->timer, (Handler)subscr_timeout, (unsigned long)sub); @@ -348,196 +345,51 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, return sub; } -/** - * subscr_conn_shutdown_event - handle termination request from subscriber - * - * Called with subscriber's server port unlocked. - */ -static void subscr_conn_shutdown_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - unsigned char const *data, - unsigned int size, - int reason) +/* Handle one termination request for the subscriber */ +static void subscr_conn_shutdown_event(int conid, void *usr_data) { - struct tipc_subscriber *subscriber = usr_handle; - spinlock_t *subscriber_lock; - - if (tipc_port_lock(port_ref) == NULL) - return; - - subscriber_lock = subscriber->lock; - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); + subscr_release((struct tipc_subscriber *)usr_data); } -/** - * subscr_conn_msg_event - handle new subscription request from subscriber - * - * Called with subscriber's server port unlocked. - */ -static void subscr_conn_msg_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - const unchar *data, - u32 size) +/* Handle one request to create a new subscription for the subscriber */ +static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len) { - struct tipc_subscriber *subscriber = usr_handle; - spinlock_t *subscriber_lock; + struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub; - /* - * Lock subscriber's server port (& make a local copy of lock pointer, - * in case subscriber is deleted while processing subscription request) - */ - if (tipc_port_lock(port_ref) == NULL) - return; - - subscriber_lock = subscriber->lock; - - if (size != sizeof(struct tipc_subscr)) { - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); - } else { - sub = subscr_subscribe((struct tipc_subscr *)data, subscriber); - spin_unlock_bh(subscriber_lock); - if (sub != NULL) { - - /* - * We must release the server port lock before adding a - * subscription to the name table since TIPC needs to be - * able to (re)acquire the port lock if an event message - * issued by the subscription process is rejected and - * returned. The subscription cannot be deleted while - * it is being added to the name table because: - * a) the single-threading of the native API port code - * ensures the subscription cannot be cancelled and - * the subscriber connection cannot be broken, and - * b) the name table lock ensures the subscription - * timeout code cannot delete the subscription, - * so the subscription object is still protected. - */ - tipc_nametbl_subscribe(sub); - } - } + spin_lock_bh(&subscriber->lock); + sub = subscr_subscribe((struct tipc_subscr *)buf, subscriber); + if (sub) + tipc_nametbl_subscribe(sub); + spin_unlock_bh(&subscriber->lock); } -/** - * subscr_named_msg_event - handle request to establish a new subscriber - */ -static void subscr_named_msg_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - const unchar *data, - u32 size, - u32 importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest) + +/* Handle one request to establish a new subscriber */ +static void *subscr_named_msg_event(int conid) { struct tipc_subscriber *subscriber; - u32 server_port_ref; /* Create subscriber object */ subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC); if (subscriber == NULL) { pr_warn("Subscriber rejected, no memory\n"); - return; + return NULL; } INIT_LIST_HEAD(&subscriber->subscription_list); - INIT_LIST_HEAD(&subscriber->subscriber_list); - - /* Create server port & establish connection to subscriber */ - tipc_createport(subscriber, - importance, - NULL, - NULL, - subscr_conn_shutdown_event, - NULL, - NULL, - subscr_conn_msg_event, - NULL, - &subscriber->port_ref); - if (subscriber->port_ref == 0) { - pr_warn("Subscriber rejected, unable to create port\n"); - kfree(subscriber); - return; - } - tipc_connect(subscriber->port_ref, orig); - - /* Lock server port (& save lock address for future use) */ - subscriber->lock = tipc_port_lock(subscriber->port_ref)->lock; - - /* Add subscriber to topology server's subscriber list */ - spin_lock_bh(&topsrv.lock); - list_add(&subscriber->subscriber_list, &topsrv.subscriber_list); - spin_unlock_bh(&topsrv.lock); - - /* Unlock server port */ - server_port_ref = subscriber->port_ref; - spin_unlock_bh(subscriber->lock); - - /* Send an ACK- to complete connection handshaking */ - tipc_send(server_port_ref, 0, NULL, 0); + subscriber->conid = conid; + spin_lock_init(&subscriber->lock); - /* Handle optional subscription request */ - if (size != 0) { - subscr_conn_msg_event(subscriber, server_port_ref, - buf, data, size); - } + return (void *)subscriber; } int tipc_subscr_start(void) { - struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV}; - int res; - - spin_lock_init(&topsrv.lock); - INIT_LIST_HEAD(&topsrv.subscriber_list); - - res = tipc_createport(NULL, - TIPC_CRITICAL_IMPORTANCE, - NULL, - NULL, - NULL, - NULL, - subscr_named_msg_event, - NULL, - NULL, - &topsrv.setup_port); - if (res) - goto failed; - - res = tipc_publish(topsrv.setup_port, TIPC_NODE_SCOPE, &seq); - if (res) { - tipc_deleteport(topsrv.setup_port); - topsrv.setup_port = 0; - goto failed; - } - - return 0; - -failed: - pr_err("Failed to create subscription service\n"); - return res; + return tipc_server_start(&topsrv); } void tipc_subscr_stop(void) { - struct tipc_subscriber *subscriber; - struct tipc_subscriber *subscriber_temp; - spinlock_t *subscriber_lock; - - if (topsrv.setup_port) { - tipc_deleteport(topsrv.setup_port); - topsrv.setup_port = 0; - - list_for_each_entry_safe(subscriber, subscriber_temp, - &topsrv.subscriber_list, - subscriber_list) { - subscriber_lock = subscriber->lock; - spin_lock_bh(subscriber_lock); - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); - } - } + tipc_server_stop(&topsrv); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 218d2e07f0c..43e6d6332a0 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -2,7 +2,7 @@ * net/tipc/subscr.h: Include file for TIPC network topology service * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2007, 2012-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,10 +37,14 @@ #ifndef _TIPC_SUBSCR_H #define _TIPC_SUBSCR_H +#include "server.h" + struct tipc_subscription; +struct tipc_subscriber; /** * struct tipc_subscription - TIPC network topology subscription object + * @subscriber: pointer to its subscriber * @seq: name sequence associated with subscription * @timeout: duration of subscription (in ms) * @filter: event filtering to be done for subscription @@ -52,13 +56,13 @@ struct tipc_subscription; * @evt: template for events generated by subscription */ struct tipc_subscription { + struct tipc_subscriber *subscriber; struct tipc_name_seq seq; u32 timeout; u32 filter; struct timer_list timer; struct list_head nameseq_list; struct list_head subscription_list; - u32 server_ref; int swap; struct tipc_event evt; }; -- cgit v1.2.3 From 7d0ab17b74330e39a68ba33099ccda27f794f519 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:41 -0400 Subject: tipc: convert configuration server to use new server facility As the new socket-based TIPC server infrastructure has been introduced, we can now convert the configuration server to use it. Then we can take future steps to simplify the configuration server locking policy. Some minor reordering of initialization is done, due to the dependency on having tipc_socket_init completed. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/config.c | 102 ++++++++++++++++++++++++------------------------------ net/tipc/core.c | 4 +-- net/tipc/socket.c | 3 +- 3 files changed, 49 insertions(+), 60 deletions(-) (limited to 'net') diff --git a/net/tipc/config.c b/net/tipc/config.c index f67866c765d..4887ae04f3a 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, 2010-2012, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,12 +38,12 @@ #include "port.h" #include "name_table.h" #include "config.h" +#include "server.h" #define REPLY_TRUNCATED "\n" -static u32 config_port_ref; - static DEFINE_SPINLOCK(config_lock); +static struct tipc_server cfgsrv; static const void *req_tlv_area; /* request message TLV area */ static int req_tlv_space; /* request message TLV area size */ @@ -381,33 +381,27 @@ exit: return rep_tlv_buf; } -static void cfg_named_msg_event(void *userdata, - u32 port_ref, - struct sk_buff **buf, - const unchar *msg, - u32 size, - u32 importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest) +static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len) { struct tipc_cfg_msg_hdr *req_hdr; struct tipc_cfg_msg_hdr *rep_hdr; struct sk_buff *rep_buf; + int ret; /* Validate configuration message header (ignore invalid message) */ - req_hdr = (struct tipc_cfg_msg_hdr *)msg; - if ((size < sizeof(*req_hdr)) || - (size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || + req_hdr = (struct tipc_cfg_msg_hdr *)buf; + if ((len < sizeof(*req_hdr)) || + (len != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) { pr_warn("Invalid configuration message discarded\n"); return; } /* Generate reply for request (if can't, return request) */ - rep_buf = tipc_cfg_do_cmd(orig->node, - ntohs(req_hdr->tcm_type), - msg + sizeof(*req_hdr), - size - sizeof(*req_hdr), + rep_buf = tipc_cfg_do_cmd(addr->addr.id.node, ntohs(req_hdr->tcm_type), + buf + sizeof(*req_hdr), + len - sizeof(*req_hdr), BUF_HEADROOM + MAX_H_SIZE + sizeof(*rep_hdr)); if (rep_buf) { skb_push(rep_buf, sizeof(*rep_hdr)); @@ -415,57 +409,51 @@ static void cfg_named_msg_event(void *userdata, memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr)); rep_hdr->tcm_len = htonl(rep_buf->len); rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST); - } else { - rep_buf = *buf; - *buf = NULL; - } - /* NEED TO ADD CODE TO HANDLE FAILED SEND (SUCH AS CONGESTION) */ - tipc_send_buf2port(port_ref, orig, rep_buf, rep_buf->len); + ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, + rep_buf->len); + if (ret < 0) + pr_err("Sending cfg reply message failed, no memory\n"); + + kfree_skb(rep_buf); + } } +static struct sockaddr_tipc cfgsrv_addr __read_mostly = { + .family = AF_TIPC, + .addrtype = TIPC_ADDR_NAMESEQ, + .addr.nameseq.type = TIPC_CFG_SRV, + .addr.nameseq.lower = 0, + .addr.nameseq.upper = 0, + .scope = TIPC_ZONE_SCOPE +}; + +static struct tipc_server cfgsrv __read_mostly = { + .saddr = &cfgsrv_addr, + .imp = TIPC_CRITICAL_IMPORTANCE, + .type = SOCK_RDM, + .max_rcvbuf_size = 64 * 1024, + .name = "cfg_server", + .tipc_conn_recvmsg = cfg_conn_msg_event, + .tipc_conn_new = NULL, + .tipc_conn_shutdown = NULL +}; + int tipc_cfg_init(void) { - struct tipc_name_seq seq; - int res; - - res = tipc_createport(NULL, TIPC_CRITICAL_IMPORTANCE, - NULL, NULL, NULL, - NULL, cfg_named_msg_event, NULL, - NULL, &config_port_ref); - if (res) - goto failed; - - seq.type = TIPC_CFG_SRV; - seq.lower = seq.upper = tipc_own_addr; - res = tipc_publish(config_port_ref, TIPC_ZONE_SCOPE, &seq); - if (res) - goto failed; - - return 0; - -failed: - pr_err("Unable to create configuration service\n"); - return res; + return tipc_server_start(&cfgsrv); } void tipc_cfg_reinit(void) { - struct tipc_name_seq seq; - int res; - - seq.type = TIPC_CFG_SRV; - seq.lower = seq.upper = 0; - tipc_withdraw(config_port_ref, TIPC_ZONE_SCOPE, &seq); + tipc_server_stop(&cfgsrv); - seq.lower = seq.upper = tipc_own_addr; - res = tipc_publish(config_port_ref, TIPC_ZONE_SCOPE, &seq); - if (res) - pr_err("Unable to reinitialize configuration service\n"); + cfgsrv_addr.addr.nameseq.lower = tipc_own_addr; + cfgsrv_addr.addr.nameseq.upper = tipc_own_addr; + tipc_server_start(&cfgsrv); } void tipc_cfg_stop(void) { - tipc_deleteport(config_port_ref); - config_port_ref = 0; + tipc_server_stop(&cfgsrv); } diff --git a/net/tipc/core.c b/net/tipc/core.c index 15bbe99b609..fd4eeeaa972 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -136,8 +136,6 @@ static int tipc_core_start(void) res = tipc_ref_table_init(tipc_max_ports, tipc_random); if (!res) res = tipc_nametbl_init(); - if (!res) - res = tipc_cfg_init(); if (!res) res = tipc_netlink_start(); if (!res) @@ -146,6 +144,8 @@ static int tipc_core_start(void) res = tipc_register_sysctl(); if (!res) res = tipc_subscr_start(); + if (!res) + res = tipc_cfg_init(); if (res) tipc_core_stop(); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d0254157a30..9510fe8acf4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -403,7 +403,8 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) return -EAFNOSUPPORT; if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && - (addr->addr.nameseq.type != TIPC_TOP_SRV)) + (addr->addr.nameseq.type != TIPC_TOP_SRV) && + (addr->addr.nameseq.type != TIPC_CFG_SRV)) return -EACCES; return (addr->scope > 0) ? -- cgit v1.2.3 From 198d73b82bf78739f8f11cf7ff567a2e0da1dbef Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:42 -0400 Subject: tipc: delete code orphaned by new server infrastructure Having completed the conversion of the topology server and configuration server to use the new server infrastructure, the following functions become unused, and can be deleted: - tipc_createport() - port_wakeup_sh() - port_dispatcher() - port_dispatcher_sigh() - tipc_send_buf_fast() - tipc_send_buf2port Additionally, the following variables become orphaned, and can be deleted: - tipc_msg_err_event - tipc_named_msg_err_event - tipc_conn_shutdown_event - tipc_msg_event - tipc_named_msg_event - tipc_conn_msg_event - tipc_continue_event - msg_queue_head - msg_queue_tail - queue_lock Deletion is done here in a separate commit in order to allow the actual conversion changes to be more easily viewed. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/link.c | 36 +------ net/tipc/port.c | 299 +------------------------------------------------------- net/tipc/port.h | 53 +--------- 3 files changed, 3 insertions(+), 385 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index a80feee5197..0a4c3a1bb9c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2,7 +2,7 @@ * net/tipc/link.c: TIPC link code * * Copyright (c) 1996-2007, 2012, Ericsson AB - * Copyright (c) 2004-2007, 2010-2011, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1056,40 +1056,6 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, return tipc_link_send_buf(l_ptr, buf); /* All other cases */ } -/* - * tipc_send_buf_fast: Entry for data messages where the - * destination node is known and the header is complete, - * inclusive total message length. - * Returns user data length. - */ -int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) -{ - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - int res; - u32 selector = msg_origport(buf_msg(buf)) & 1; - u32 dummy; - - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(destnode); - if (likely(n_ptr)) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[selector]; - if (likely(l_ptr)) { - res = link_send_buf_fast(l_ptr, buf, &dummy); - tipc_node_unlock(n_ptr); - read_unlock_bh(&tipc_net_lock); - return res; - } - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); - res = msg_data_sz(buf_msg(buf)); - tipc_reject_msg(buf, TIPC_ERR_NO_NODE); - return res; -} - - /* * tipc_link_send_sections_fast: Entry for messages where the * destination processor is known and the header is complete, diff --git a/net/tipc/port.c b/net/tipc/port.c index 18098cac62f..0651522c943 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -2,7 +2,7 @@ * net/tipc/port.c: TIPC port code * * Copyright (c) 1992-2007, Ericsson AB - * Copyright (c) 2004-2008, 2010-2011, Wind River Systems + * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,11 +46,7 @@ #define MAX_REJECT_SIZE 1024 -static struct sk_buff *msg_queue_head; -static struct sk_buff *msg_queue_tail; - DEFINE_SPINLOCK(tipc_port_list_lock); -static DEFINE_SPINLOCK(queue_lock); static LIST_HEAD(ports); static void port_handle_node_down(unsigned long ref); @@ -668,215 +664,6 @@ void tipc_port_reinit(void) spin_unlock_bh(&tipc_port_list_lock); } - -/* - * port_dispatcher_sigh(): Signal handler for messages destinated - * to the tipc_port interface. - */ -static void port_dispatcher_sigh(void *dummy) -{ - struct sk_buff *buf; - - spin_lock_bh(&queue_lock); - buf = msg_queue_head; - msg_queue_head = NULL; - spin_unlock_bh(&queue_lock); - - while (buf) { - struct tipc_port *p_ptr; - struct user_port *up_ptr; - struct tipc_portid orig; - struct tipc_name_seq dseq; - void *usr_handle; - int connected; - int peer_invalid; - int published; - u32 message_type; - - struct sk_buff *next = buf->next; - struct tipc_msg *msg = buf_msg(buf); - u32 dref = msg_destport(msg); - - message_type = msg_type(msg); - if (message_type > TIPC_DIRECT_MSG) - goto reject; /* Unsupported message type */ - - p_ptr = tipc_port_lock(dref); - if (!p_ptr) - goto reject; /* Port deleted while msg in queue */ - - orig.ref = msg_origport(msg); - orig.node = msg_orignode(msg); - up_ptr = p_ptr->user_port; - usr_handle = up_ptr->usr_handle; - connected = p_ptr->connected; - peer_invalid = connected && !tipc_port_peer_msg(p_ptr, msg); - published = p_ptr->published; - - if (unlikely(msg_errcode(msg))) - goto err; - - switch (message_type) { - - case TIPC_CONN_MSG:{ - tipc_conn_msg_event cb = up_ptr->conn_msg_cb; - u32 dsz; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb)) - goto reject; - if (unlikely(!connected)) { - if (tipc_connect(dref, &orig)) - goto reject; - } else if (peer_invalid) - goto reject; - dsz = msg_data_sz(msg); - if (unlikely(dsz && - (++p_ptr->conn_unacked >= - TIPC_FLOW_CONTROL_WIN))) - tipc_acknowledge(dref, - p_ptr->conn_unacked); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), dsz); - break; - } - case TIPC_DIRECT_MSG:{ - tipc_msg_event cb = up_ptr->msg_cb; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb || connected)) - goto reject; - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_importance(msg), - &orig); - break; - } - case TIPC_MCAST_MSG: - case TIPC_NAMED_MSG:{ - tipc_named_msg_event cb = up_ptr->named_msg_cb; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb || connected || !published)) - goto reject; - dseq.type = msg_nametype(msg); - dseq.lower = msg_nameinst(msg); - dseq.upper = (message_type == TIPC_NAMED_MSG) - ? dseq.lower : msg_nameupper(msg); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_importance(msg), - &orig, &dseq); - break; - } - } - if (buf) - kfree_skb(buf); - buf = next; - continue; -err: - switch (message_type) { - - case TIPC_CONN_MSG:{ - tipc_conn_shutdown_event cb = - up_ptr->conn_err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || !connected || peer_invalid) - break; - tipc_disconnect(dref); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg)); - break; - } - case TIPC_DIRECT_MSG:{ - tipc_msg_err_event cb = up_ptr->err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || connected) - break; - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg), &orig); - break; - } - case TIPC_MCAST_MSG: - case TIPC_NAMED_MSG:{ - tipc_named_msg_err_event cb = - up_ptr->named_err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || connected) - break; - dseq.type = msg_nametype(msg); - dseq.lower = msg_nameinst(msg); - dseq.upper = (message_type == TIPC_NAMED_MSG) - ? dseq.lower : msg_nameupper(msg); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg), &dseq); - break; - } - } - if (buf) - kfree_skb(buf); - buf = next; - continue; -reject: - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); - buf = next; - } -} - -/* - * port_dispatcher(): Dispatcher for messages destinated - * to the tipc_port interface. Called with port locked. - */ -static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf) -{ - buf->next = NULL; - spin_lock_bh(&queue_lock); - if (msg_queue_head) { - msg_queue_tail->next = buf; - msg_queue_tail = buf; - } else { - msg_queue_tail = msg_queue_head = buf; - tipc_k_signal((Handler)port_dispatcher_sigh, 0); - } - spin_unlock_bh(&queue_lock); - return 0; -} - -/* - * Wake up port after congestion: Called with port locked - */ -static void port_wakeup_sh(unsigned long ref) -{ - struct tipc_port *p_ptr; - struct user_port *up_ptr; - tipc_continue_event cb = NULL; - void *uh = NULL; - - p_ptr = tipc_port_lock(ref); - if (p_ptr) { - up_ptr = p_ptr->user_port; - if (up_ptr) { - cb = up_ptr->continue_event_cb; - uh = up_ptr->usr_handle; - } - tipc_port_unlock(p_ptr); - } - if (cb) - cb(uh, ref); -} - - -static void port_wakeup(struct tipc_port *p_ptr) -{ - tipc_k_signal((Handler)port_wakeup_sh, p_ptr->ref); -} - void tipc_acknowledge(u32 ref, u32 ack) { struct tipc_port *p_ptr; @@ -893,50 +680,6 @@ void tipc_acknowledge(u32 ref, u32 ack) tipc_net_route_msg(buf); } -/* - * tipc_createport(): user level call. - */ -int tipc_createport(void *usr_handle, - unsigned int importance, - tipc_msg_err_event error_cb, - tipc_named_msg_err_event named_error_cb, - tipc_conn_shutdown_event conn_error_cb, - tipc_msg_event msg_cb, - tipc_named_msg_event named_msg_cb, - tipc_conn_msg_event conn_msg_cb, - tipc_continue_event continue_event_cb, /* May be zero */ - u32 *portref) -{ - struct user_port *up_ptr; - struct tipc_port *p_ptr; - - up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC); - if (!up_ptr) { - pr_warn("Port creation failed, no memory\n"); - return -ENOMEM; - } - p_ptr = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, - importance); - if (!p_ptr) { - kfree(up_ptr); - return -ENOMEM; - } - - p_ptr->user_port = up_ptr; - up_ptr->usr_handle = usr_handle; - up_ptr->ref = p_ptr->ref; - up_ptr->err_cb = error_cb; - up_ptr->named_err_cb = named_error_cb; - up_ptr->conn_err_cb = conn_error_cb; - up_ptr->msg_cb = msg_cb; - up_ptr->named_msg_cb = named_msg_cb; - up_ptr->conn_msg_cb = conn_msg_cb; - up_ptr->continue_event_cb = continue_event_cb; - *portref = p_ptr->ref; - tipc_port_unlock(p_ptr); - return 0; -} - int tipc_portimportance(u32 ref, unsigned int *importance) { struct tipc_port *p_ptr; @@ -1322,43 +1065,3 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest, } return -ELINKCONG; } - -/** - * tipc_send_buf2port - send message buffer to port identity - */ -int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, - struct sk_buff *buf, unsigned int dsz) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg; - int res; - - p_ptr = (struct tipc_port *)tipc_ref_deref(ref); - if (!p_ptr || p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_destnode(msg, dest->node); - msg_set_destport(msg, dest->ref); - msg_set_hdr_sz(msg, BASIC_H_SIZE); - msg_set_size(msg, BASIC_H_SIZE + dsz); - if (skb_cow(buf, BASIC_H_SIZE)) - return -ENOMEM; - - skb_push(buf, BASIC_H_SIZE); - skb_copy_to_linear_data(buf, msg, BASIC_H_SIZE); - - if (in_own_node(dest->node)) - res = tipc_port_recv_msg(buf); - else - res = tipc_send_buf_fast(buf, dest->node); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (port_unreliable(p_ptr)) - return dsz; - return -ELINKCONG; -} diff --git a/net/tipc/port.h b/net/tipc/port.h index 2485649c408..7fd37c202ce 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -2,7 +2,7 @@ * net/tipc/port.h: Include file for TIPC port code * * Copyright (c) 1994-2007, Ericsson AB - * Copyright (c) 2004-2007, 2010-2011, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,37 +46,6 @@ #define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) -typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason, - struct tipc_portid const *attmpt_destid); - -typedef void (*tipc_named_msg_err_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason, - struct tipc_name_seq const *attmpt_dest); - -typedef void (*tipc_conn_shutdown_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason); - -typedef void (*tipc_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, unsigned int importance, - struct tipc_portid const *origin); - -typedef void (*tipc_named_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, unsigned int importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest); - -typedef void (*tipc_conn_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size); - -typedef void (*tipc_continue_event) (void *usr_handle, u32 portref); - /** * struct user_port - TIPC user port (used with native API) * @usr_handle: user-specified field @@ -87,13 +56,6 @@ typedef void (*tipc_continue_event) (void *usr_handle, u32 portref); struct user_port { void *usr_handle; u32 ref; - tipc_msg_err_event err_cb; - tipc_named_msg_err_event named_err_cb; - tipc_conn_shutdown_event conn_err_cb; - tipc_msg_event msg_cb; - tipc_named_msg_event named_msg_cb; - tipc_conn_msg_event conn_msg_cb; - tipc_continue_event continue_event_cb; }; /** @@ -164,18 +126,8 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, int tipc_reject_msg(struct sk_buff *buf, u32 err); -int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode); - void tipc_acknowledge(u32 port_ref, u32 ack); -int tipc_createport(void *usr_handle, - unsigned int importance, tipc_msg_err_event error_cb, - tipc_named_msg_err_event named_error_cb, - tipc_conn_shutdown_event conn_error_cb, tipc_msg_event msg_cb, - tipc_named_msg_event named_msg_cb, - tipc_conn_msg_event conn_msg_cb, - tipc_continue_event continue_event_cb, u32 *portref); - int tipc_deleteport(u32 portref); int tipc_portimportance(u32 portref, unsigned int *importance); @@ -222,9 +174,6 @@ int tipc_send2port(u32 portref, struct tipc_portid const *dest, unsigned int num_sect, struct iovec const *msg_sect, unsigned int total_len); -int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest, - struct sk_buff *buf, unsigned int dsz); - int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, unsigned int section_count, struct iovec const *msg, unsigned int total_len); -- cgit v1.2.3 From f1733d7580ff94deb8ea071a293c23939ae0d450 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:43 -0400 Subject: tipc: remove user_port instance from tipc_port structure After the native API has been completely removed, the 'user_port' field in struct tipc_port becomes unused, and can be removed. As a consequence, the "usrmem" argument in tipc_msg_build() is no longer needed, and so we remove that one too. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/link.c | 18 +++++++----------- net/tipc/msg.c | 15 +++++---------- net/tipc/msg.h | 4 ++-- net/tipc/port.c | 8 +++----- net/tipc/port.h | 14 -------------- 5 files changed, 17 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 0a4c3a1bb9c..d34429d03c1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1081,7 +1081,7 @@ again: * (Must not hold any locks while building message.) */ res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, - sender->max_pkt, !sender->user_port, &buf); + sender->max_pkt, &buf); read_lock_bh(&tipc_net_lock); node = tipc_node_find(destaddr); @@ -1216,18 +1216,14 @@ again: else sz = fragm_rest; - if (likely(!sender->user_port)) { - if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { + if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { error: - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } - return -EFAULT; + for (; buf_chain; buf_chain = buf) { + buf = buf_chain->next; + kfree_skb(buf_chain); } - } else - skb_copy_to_linear_data_offset(buf, fragm_crs, - sect_crs, sz); + return -EFAULT; + } sect_crs += sz; sect_rest -= sz; fragm_crs += sz; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index f2db8a87d9c..c2a26132251 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -73,8 +73,8 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, * Returns message data size or errno */ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - int max_size, int usrmem, struct sk_buff **buf) + u32 num_sect, unsigned int total_len, int max_size, + struct sk_buff **buf) { int dsz, sz, hsz, pos, res, cnt; @@ -92,14 +92,9 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, return -ENOMEM; skb_copy_to_linear_data(*buf, hdr, hsz); for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) { - if (likely(usrmem)) - res = !copy_from_user((*buf)->data + pos, - msg_sect[cnt].iov_base, - msg_sect[cnt].iov_len); - else - skb_copy_to_linear_data_offset(*buf, pos, - msg_sect[cnt].iov_base, - msg_sect[cnt].iov_len); + skb_copy_to_linear_data_offset(*buf, pos, + msg_sect[cnt].iov_base, + msg_sect[cnt].iov_len); pos += msg_sect[cnt].iov_len; } if (likely(res)) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index ba2a72beea6..511019a77c9 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -722,6 +722,6 @@ u32 tipc_msg_tot_importance(struct tipc_msg *m); void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - int max_size, int usrmem, struct sk_buff **buf); + u32 num_sect, unsigned int total_len, int max_size, + struct sk_buff **buf); #endif diff --git a/net/tipc/port.c b/net/tipc/port.c index 0651522c943..f628c84a8f6 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -115,7 +115,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, msg_set_nameupper(hdr, seq->upper); msg_set_hdr_sz(hdr, MCAST_H_SIZE); res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - !oport->user_port, &buf); + &buf); if (unlikely(!buf)) return res; @@ -234,7 +234,6 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); p_ptr->dispatcher = dispatcher; p_ptr->wakeup = wakeup; - p_ptr->user_port = NULL; k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref); INIT_LIST_HEAD(&p_ptr->publications); INIT_LIST_HEAD(&p_ptr->port_list); @@ -271,7 +270,6 @@ int tipc_deleteport(u32 ref) buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); tipc_nodesub_unsubscribe(&p_ptr->subscription); } - kfree(p_ptr->user_port); spin_lock_bh(&tipc_port_list_lock); list_del(&p_ptr->port_list); @@ -444,7 +442,7 @@ int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, int res; res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - !p_ptr->user_port, &buf); + &buf); if (!buf) return res; @@ -927,7 +925,7 @@ static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_se int res; res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, total_len, - MAX_MSG_SIZE, !sender->user_port, &buf); + MAX_MSG_SIZE, &buf); if (likely(buf)) tipc_port_recv_msg(buf); return res; diff --git a/net/tipc/port.h b/net/tipc/port.h index 7fd37c202ce..4779f0a8223 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -46,18 +46,6 @@ #define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) -/** - * struct user_port - TIPC user port (used with native API) - * @usr_handle: user-specified field - * @ref: object reference to associated TIPC port - * - * - */ -struct user_port { - void *usr_handle; - u32 ref; -}; - /** * struct tipc_port - TIPC port structure * @usr_handle: pointer to additional user-defined information about port @@ -74,7 +62,6 @@ struct user_port { * @port_list: adjacent ports in TIPC's global list of ports * @dispatcher: ptr to routine which handles received messages * @wakeup: ptr to routine to call when port is no longer congested - * @user_port: ptr to user port associated with port (if any) * @wait_list: adjacent ports in list of ports waiting on link congestion * @waiting_pkts: * @sent: # of non-empty messages sent by port @@ -101,7 +88,6 @@ struct tipc_port { struct list_head port_list; u32 (*dispatcher)(struct tipc_port *, struct sk_buff *); void (*wakeup)(struct tipc_port *); - struct user_port *user_port; struct list_head wait_list; u32 waiting_pkts; u32 sent; -- cgit v1.2.3 From 3c5db8e4eca36e4f312b49bba99f4c1f6ce0563a Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:44 -0400 Subject: tipc: rename tipc_createport_raw to tipc_createport After the removal of the native API, there is now only one way to to create a TIPC port instance -- the function tipc_createport_raw(). We make it more readable by renaming it to tipc_createport(). Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/port.c | 4 ++-- net/tipc/port.h | 2 +- net/tipc/socket.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/tipc/port.c b/net/tipc/port.c index f628c84a8f6..84b2a574f16 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -202,11 +202,11 @@ exit: } /** - * tipc_createport_raw - create a generic TIPC port + * tipc_createport - create a generic TIPC port * * Returns pointer to (locked) TIPC port, or NULL if unable to create it */ -struct tipc_port *tipc_createport_raw(void *usr_handle, +struct tipc_port *tipc_createport(void *usr_handle, u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), void (*wakeup)(struct tipc_port *), const u32 importance) diff --git a/net/tipc/port.h b/net/tipc/port.h index 4779f0a8223..45838826f2f 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -106,7 +106,7 @@ struct tipc_port_list; /* * TIPC port manipulation routines */ -struct tipc_port *tipc_createport_raw(void *usr_handle, +struct tipc_port *tipc_createport(void *usr_handle, u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), void (*wakeup)(struct tipc_port *), const u32 importance); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9510fe8acf4..67f4e1fbf5a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -195,8 +195,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, return -ENOMEM; /* Allocate TIPC port for socket to use */ - tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch, - TIPC_LOW_IMPORTANCE); + tp_ptr = tipc_createport(sk, &dispatch, &wakeupdispatch, + TIPC_LOW_IMPORTANCE); if (unlikely(!tp_ptr)) { sk_free(sk); return -ENOMEM; -- cgit v1.2.3 From 28e5297281ab85d636aa814a9b65cfb99375d092 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:45 -0400 Subject: tipc: convert config_lock from spinlock to mutex As the configuration server is now running under process context, it's unnecessary for us to have a spinlock serializing the TIPC configuration process. Instead, we replace it with a mutex lock, which gives us more freedom. For instance, we can now call pre-emptable functions within the protected area. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/config.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/tipc/config.c b/net/tipc/config.c index 4887ae04f3a..c301a9a592d 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -42,7 +42,7 @@ #define REPLY_TRUNCATED "\n" -static DEFINE_SPINLOCK(config_lock); +static DEFINE_MUTEX(config_mutex); static struct tipc_server cfgsrv; static const void *req_tlv_area; /* request message TLV area */ @@ -181,18 +181,7 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_own_addr) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - - /* - * Must temporarily release configuration spinlock while switching into - * networking mode as it calls tipc_eth_media_start(), which may sleep. - * Releasing the lock is harmless as other locally-issued configuration - * commands won't occur until this one completes, and remotely-issued - * configuration commands can't be received until a local configuration - * command to enable the first bearer is received and processed. - */ - spin_unlock_bh(&config_lock); tipc_core_start_net(addr); - spin_lock_bh(&config_lock); return tipc_cfg_reply_none(); } @@ -248,7 +237,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area { struct sk_buff *rep_tlv_buf; - spin_lock_bh(&config_lock); + mutex_lock(&config_mutex); /* Save request and reply details in a well-known location */ req_tlv_area = request_area; @@ -377,7 +366,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area /* Return reply buffer */ exit: - spin_unlock_bh(&config_lock); + mutex_unlock(&config_mutex); return rep_tlv_buf; } -- cgit v1.2.3 From c0fee8aca7206264d5e3dcc4e60aaf86501f4ea1 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:46 -0400 Subject: tipc: save sock structure pointer instead of void pointer to tipc_port Directly save sock structure pointer instead of void pointer to avoid unnecessary cast conversions. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/port.c | 4 ++-- net/tipc/port.h | 6 +++--- net/tipc/socket.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/tipc/port.c b/net/tipc/port.c index 84b2a574f16..0bb185a3ed4 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -206,7 +206,7 @@ exit: * * Returns pointer to (locked) TIPC port, or NULL if unable to create it */ -struct tipc_port *tipc_createport(void *usr_handle, +struct tipc_port *tipc_createport(struct sock *sk, u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), void (*wakeup)(struct tipc_port *), const u32 importance) @@ -227,7 +227,7 @@ struct tipc_port *tipc_createport(void *usr_handle, return NULL; } - p_ptr->usr_handle = usr_handle; + p_ptr->sk = sk; p_ptr->max_pkt = MAX_PKT_DEFAULT; p_ptr->ref = ref; INIT_LIST_HEAD(&p_ptr->wait_list); diff --git a/net/tipc/port.h b/net/tipc/port.h index 45838826f2f..241f529db94 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -48,7 +48,7 @@ /** * struct tipc_port - TIPC port structure - * @usr_handle: pointer to additional user-defined information about port + * @sk: pointer to socket handle * @lock: pointer to spinlock for controlling access to port * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established @@ -74,7 +74,7 @@ * @subscription: "node down" subscription used to terminate failed connections */ struct tipc_port { - void *usr_handle; + struct sock *sk; spinlock_t *lock; int connected; u32 conn_type; @@ -106,7 +106,7 @@ struct tipc_port_list; /* * TIPC port manipulation routines */ -struct tipc_port *tipc_createport(void *usr_handle, +struct tipc_port *tipc_createport(struct sock *sk, u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), void (*wakeup)(struct tipc_port *), const u32 importance); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 67f4e1fbf5a..14ed54e961b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1406,7 +1406,7 @@ static int backlog_rcv(struct sock *sk, struct sk_buff *buf) */ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) { - struct sock *sk = (struct sock *)tport->usr_handle; + struct sock *sk = tport->sk; u32 res; /* @@ -1437,7 +1437,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) */ static void wakeupdispatch(struct tipc_port *tport) { - struct sock *sk = (struct sock *)tport->usr_handle; + struct sock *sk = tport->sk; sk->sk_write_space(sk); } -- cgit v1.2.3 From ae8509c420122866344bde1241e31858d0aa2fbc Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 17 Jun 2013 10:54:47 -0400 Subject: tipc: cosmetic realignment of function arguments No runtime code changes here. Just a realign of the function arguments to start where the 1st one was, and fit as many args as can be put in an 80 char line. Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/bcast.c | 3 +-- net/tipc/bcast.h | 3 ++- net/tipc/discover.c | 7 +++---- net/tipc/eth_media.c | 2 +- net/tipc/link.c | 18 +++++++----------- net/tipc/msg.c | 4 ++-- net/tipc/msg.h | 4 ++-- net/tipc/name_table.c | 10 +++++----- net/tipc/name_table.h | 11 ++++++----- net/tipc/node_subscr.c | 2 +- net/tipc/port.c | 7 ++++--- net/tipc/port.h | 10 ++++++---- net/tipc/socket.c | 12 ++++++------ net/tipc/subscr.c | 14 ++++---------- net/tipc/subscr.h | 13 ++++--------- 15 files changed, 54 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index e5f3da50782..716de1ac6cb 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -578,8 +578,7 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) * Returns 0 (packet sent successfully) under all circumstances, * since the broadcast link's pseudo-bearer never blocks */ -static int tipc_bcbearer_send(struct sk_buff *buf, - struct tipc_bearer *unused1, +static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { int bp_index; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index a93306557e0..6ee587b469f 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -75,7 +75,8 @@ void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); /** * tipc_nmap_equal - test for equality of node maps */ -static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b) +static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, + struct tipc_node_map *nm_b) { return !memcmp(nm_a, nm_b, sizeof(*nm_a)); } diff --git a/net/tipc/discover.c b/net/tipc/discover.c index eedff58d038..ecc758c6eac 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -70,8 +70,7 @@ struct tipc_link_req { * @dest_domain: network domain of node(s) which should respond to message * @b_ptr: ptr to bearer issuing message */ -static struct sk_buff *tipc_disc_init_msg(u32 type, - u32 dest_domain, +static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain, struct tipc_bearer *b_ptr) { struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); @@ -346,8 +345,8 @@ exit: * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct tipc_bearer *b_ptr, - struct tipc_media_addr *dest, u32 dest_domain) +int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, + u32 dest_domain) { struct tipc_link_req *req; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index fc60bea6316..c1aa37fdca2 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -62,7 +62,7 @@ static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; static int eth_started; static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv); + void *dv); /* * Network device notifier info */ diff --git a/net/tipc/link.c b/net/tipc/link.c index d34429d03c1..b852c94a784 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -771,8 +771,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) * link_bundle_buf(): Append contents of a buffer to * the tail of an existing one. */ -static int link_bundle_buf(struct tipc_link *l_ptr, - struct sk_buff *bundler, +static int link_bundle_buf(struct tipc_link *l_ptr, struct sk_buff *bundler, struct sk_buff *buf) { struct tipc_msg *bundler_msg = buf_msg(bundler); @@ -1064,8 +1063,7 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, */ int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, - const u32 num_sect, - unsigned int total_len, + const u32 num_sect, unsigned int total_len, u32 destaddr) { struct tipc_msg *hdr = &sender->phdr; @@ -1155,8 +1153,7 @@ exit: */ static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, - u32 num_sect, - unsigned int total_len, + u32 num_sect, unsigned int total_len, u32 destaddr) { struct tipc_link *l_ptr; @@ -1408,7 +1405,7 @@ static void link_reset_all(unsigned long addr) } static void link_retransmit_failure(struct tipc_link *l_ptr, - struct sk_buff *buf) + struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); @@ -1863,8 +1860,8 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, * Send protocol message to the other endpoint. */ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, - int probe_msg, u32 gap, u32 tolerance, - u32 priority, u32 ack_mtu) + int probe_msg, u32 gap, u32 tolerance, + u32 priority, u32 ack_mtu) { struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; @@ -2107,8 +2104,7 @@ exit: * another bearer. Owner node is locked. */ static void tipc_link_tunnel(struct tipc_link *l_ptr, - struct tipc_msg *tunnel_hdr, - struct tipc_msg *msg, + struct tipc_msg *tunnel_hdr, struct tipc_msg *msg, u32 selector) { struct tipc_link *tunnel; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index c2a26132251..ced60e2fc4f 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -51,8 +51,8 @@ u32 tipc_msg_tot_importance(struct tipc_msg *m) } -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, - u32 hsize, u32 destnode) +void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, + u32 destnode) { memset(m, 0, hsize); msg_set_version(m); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 511019a77c9..5e4ccf5c27d 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -719,8 +719,8 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) } u32 tipc_msg_tot_importance(struct tipc_msg *m); -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, - u32 hsize, u32 destnode); +void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, + u32 destnode); int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, unsigned int total_len, int max_size, struct sk_buff **buf); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 24b16791431..09dcd54b04e 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -440,7 +440,7 @@ found: * sequence overlapping with the requested sequence */ static void tipc_nameseq_subscribe(struct name_seq *nseq, - struct tipc_subscription *s) + struct tipc_subscription *s) { struct sub_seq *sseq = nseq->sseqs; @@ -662,7 +662,7 @@ exit: * tipc_nametbl_publish - add name publication to network name tables */ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key) + u32 scope, u32 port_ref, u32 key) { struct publication *publ; @@ -753,7 +753,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) * subseq_list - print specified sub-sequence contents into the given buffer */ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, - u32 index) + u32 index) { char portIdStr[27]; const char *scope_str[] = {"", " zone", " cluster", " node"}; @@ -792,7 +792,7 @@ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, * nameseq_list - print specified name sequence contents into the given buffer */ static int nameseq_list(struct name_seq *seq, char *buf, int len, u32 depth, - u32 type, u32 lowbound, u32 upbound, u32 index) + u32 type, u32 lowbound, u32 upbound, u32 index) { struct sub_seq *sseq; char typearea[11]; @@ -849,7 +849,7 @@ static int nametbl_header(char *buf, int len, u32 depth) * nametbl_list - print specified name table contents into the given buffer */ static int nametbl_list(char *buf, int len, u32 depth_info, - u32 type, u32 lowbound, u32 upbound) + u32 type, u32 lowbound, u32 upbound) { struct hlist_head *seq_head; struct name_seq *seq; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 71cb4dc712d..f02f48b9a21 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -87,14 +87,15 @@ extern rwlock_t tipc_nametbl_lock; struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space); u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node); int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports); + struct tipc_port_list *dports); struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key); + u32 scope, u32 port_ref, u32 key); int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key); struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 ref, u32 key); -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, - u32 node, u32 ref, u32 key); + u32 scope, u32 node, u32 ref, + u32 key); +struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node, + u32 ref, u32 key); void tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(void); diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 5e34b015da4..8a7384c04ad 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -42,7 +42,7 @@ * tipc_nodesub_subscribe - create "node down" subscription for specified node */ void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, - void *usr_handle, net_ev_handler handle_down) + void *usr_handle, net_ev_handler handle_down) { if (in_own_node(addr)) { node_sub->node = NULL; diff --git a/net/tipc/port.c b/net/tipc/port.c index 0bb185a3ed4..b3ed2fcab4f 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -207,9 +207,10 @@ exit: * Returns pointer to (locked) TIPC port, or NULL if unable to create it */ struct tipc_port *tipc_createport(struct sock *sk, - u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), - void (*wakeup)(struct tipc_port *), - const u32 importance) + u32 (*dispatcher)(struct tipc_port *, + struct sk_buff *), + void (*wakeup)(struct tipc_port *), + const u32 importance) { struct tipc_port *p_ptr; struct tipc_msg *msg; diff --git a/net/tipc/port.h b/net/tipc/port.h index 241f529db94..5a7026b9c34 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -107,8 +107,10 @@ struct tipc_port_list; * TIPC port manipulation routines */ struct tipc_port *tipc_createport(struct sock *sk, - u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), - void (*wakeup)(struct tipc_port *), const u32 importance); + u32 (*dispatcher)(struct tipc_port *, + struct sk_buff *), + void (*wakeup)(struct tipc_port *), + const u32 importance); int tipc_reject_msg(struct sk_buff *buf, u32 err); @@ -126,9 +128,9 @@ int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); int tipc_publish(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); + struct tipc_name_seq const *name_seq); int tipc_withdraw(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); + struct tipc_name_seq const *name_seq); int tipc_connect(u32 portref, struct tipc_portid const *port); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 14ed54e961b..ce8249c7682 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -278,7 +278,7 @@ void tipc_sock_release_local(struct socket *sock) */ int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags) + int flags) { struct sock *sk = sock->sk; int ret; @@ -889,7 +889,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) * Returns 0 if successful, otherwise errno */ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, - struct tipc_port *tport) + struct tipc_port *tport) { u32 anc_data[3]; u32 err; @@ -1736,8 +1736,8 @@ restart: * * Returns 0 on success, errno otherwise */ -static int setsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, unsigned int ol) +static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov, + unsigned int ol) { struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); @@ -1795,8 +1795,8 @@ static int setsockopt(struct socket *sock, * * Returns 0 on success, errno otherwise */ -static int getsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, int __user *ol) +static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, + int __user *ol) { struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index f6be92a6973..d38bb45d82e 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -117,10 +117,8 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, * * Returns 1 if there is overlap, otherwise 0. */ -int tipc_subscr_overlap(struct tipc_subscription *sub, - u32 found_lower, +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper) - { if (found_lower < sub->seq.lower) found_lower = sub->seq.lower; @@ -136,13 +134,9 @@ int tipc_subscr_overlap(struct tipc_subscription *sub, * * Protected by nameseq.lock in name_table.c */ -void tipc_subscr_report_overlap(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, - u32 node, - int must) +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must) { if (!tipc_subscr_overlap(sub, found_lower, found_upper)) return; diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 43e6d6332a0..393e417bee3 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -67,17 +67,12 @@ struct tipc_subscription { struct tipc_event evt; }; -int tipc_subscr_overlap(struct tipc_subscription *sub, - u32 found_lower, +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper); -void tipc_subscr_report_overlap(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, - u32 node, - int must_report); +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must); int tipc_subscr_start(void); -- cgit v1.2.3 From 796c75d0d3ef13cd1df00779abb8b27edb630504 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:48 -0400 Subject: tipc: enhance priority of link protocol packet pfifo_fast is set as default traffic class queueing discipline. This queue has three so called "bands". Within each band, FIFO rules apply. However, as long as there are packets waiting in band 0, band 1 won't be processed. Now all kind of TIPC type packet priorities are never set, that is, their priorities are 0, so they are mapped to band 1 of pfifo_fast qdisc. But, especially during link congestion, if link protocol packet can be sent out as earlier as possible than other type of packets so that protocol packet can arrive at peer endpoint in time, the peer will timely reset its link timeout timer to keep the link alive. So enhancing the priority of link protocol packets can meet the specific demand to avoid unnecessary link reset due to a transient link congestion. Signed-off-by: Ying Xue Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/link.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index b852c94a784..b6de1aa059f 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -41,6 +41,8 @@ #include "discover.h" #include "config.h" +#include + /* * Error message prefixes */ @@ -1947,6 +1949,7 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, return; skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); + buf->priority = TC_PRIO_CONTROL; /* Defer message if bearer is already blocked */ if (tipc_bearer_blocked(l_ptr->b_ptr)) { -- cgit v1.2.3 From 7410f967ba9bdc14b1e336e5d235929ed878cbfc Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:49 -0400 Subject: tipc: make tipc_link_send_sections_fast exit earlier Once message build request function returns invalid code, the process of sending message cannot continue. So in case of message build failure, tipc_link_send_sections_fast() should return immediately. Signed-off-by: Ying Xue Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/link.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index b6de1aa059f..b6ffa9fab24 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1082,6 +1082,9 @@ again: */ res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, sender->max_pkt, &buf); + /* Exit if build request was invalid */ + if (unlikely(res < 0)) + return res; read_lock_bh(&tipc_net_lock); node = tipc_node_find(destaddr); @@ -1098,10 +1101,6 @@ exit: return res; } - /* Exit if build request was invalid */ - if (unlikely(res < 0)) - goto exit; - /* Exit if link (or bearer) is congested */ if (link_congested(l_ptr) || tipc_bearer_blocked(l_ptr->b_ptr)) { -- cgit v1.2.3 From 126c0524648631a0f6fba4d016586b236209fe6f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:50 -0400 Subject: tipc: fix wrong return value for link_send_sections_long routine When skb buffer cannot be allocated in link_send_sections_long(), -ENOMEM error code instead of -EFAULT should be returned to its caller. Signed-off-by: Ying Xue Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/link.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index b6ffa9fab24..0cc3d9015c5 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1168,6 +1168,7 @@ static int link_send_sections_long(struct tipc_port *sender, const unchar *sect_crs; int curr_sect; u32 fragm_no; + int res = 0; again: fragm_no = 1; @@ -1215,12 +1216,13 @@ again: sz = fragm_rest; if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { + res = -EFAULT; error: for (; buf_chain; buf_chain = buf) { buf = buf_chain->next; kfree_skb(buf_chain); } - return -EFAULT; + return res; } sect_crs += sz; sect_rest -= sz; @@ -1241,8 +1243,10 @@ error: msg_set_fragm_no(&fragm_hdr, ++fragm_no); prev = buf; buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (!buf) + if (!buf) { + res = -ENOMEM; goto error; + } buf->next = NULL; prev->next = buf; -- cgit v1.2.3 From 2537af9dcabbdd6c93c041a955d3a9ae42c0c008 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 17 Jun 2013 10:54:51 -0400 Subject: tipc: remove dev_base_lock use from enable_bearer Convert enable_bearer() to RCU locking with dev_get_by_name(). Based on a similar changeset in commit 840a185d ["aoe: remove dev_base_lock use from aoecmd_cfg_pkts()"] -- quoting that: "dev_base_lock is the legacy way to lock the device list, and is planned to disappear. (writers hold RTNL, readers hold RCU lock)" Signed-off-by: Ying Xue Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/eth_media.c | 13 ++----------- net/tipc/ib_media.c | 13 ++----------- 2 files changed, 4 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index c1aa37fdca2..40ea40cf620 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -162,8 +162,7 @@ static void setup_bearer(struct work_struct *work) */ static int enable_bearer(struct tipc_bearer *tb_ptr) { - struct net_device *dev = NULL; - struct net_device *pdev = NULL; + struct net_device *dev; struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; @@ -178,15 +177,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) } /* Find device with specified name */ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, pdev) { - if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { - dev = pdev; - dev_hold(dev); - break; - } - } - read_unlock(&dev_base_lock); + dev = dev_get_by_name(&init_net, driver_name); if (!dev) return -ENODEV; diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index baa9df4327d..ad2e1ec4117 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -155,8 +155,7 @@ static void setup_bearer(struct work_struct *work) */ static int enable_bearer(struct tipc_bearer *tb_ptr) { - struct net_device *dev = NULL; - struct net_device *pdev = NULL; + struct net_device *dev; struct ib_bearer *ib_ptr = &ib_bearers[0]; struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; @@ -171,15 +170,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) } /* Find device with specified name */ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, pdev) { - if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { - dev = pdev; - dev_hold(dev); - break; - } - } - read_unlock(&dev_base_lock); + dev = dev_get_by_name(&init_net, driver_name); if (!dev) return -ENODEV; -- cgit v1.2.3 From 939cfa75a0cea97aa60cb88e3722baefdceb4e72 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 17 Jun 2013 11:40:04 +0200 Subject: net: sctp: get rid of t_new macro for kzalloc t_new rather obfuscates things where everyone else is using actual function names instead of that macro, so replace it with kzalloc, which is the function t_new wraps. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 +- net/sctp/bind_addr.c | 2 +- net/sctp/endpointola.c | 3 ++- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/sctp/transport.c | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 756025c98e8..bf6e6bd553c 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -355,7 +355,7 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, { struct sctp_association *asoc; - asoc = t_new(struct sctp_association, gfp); + asoc = kzalloc(sizeof(*asoc), gfp); if (!asoc) goto fail; diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 41145fe3181..64977ea0f9c 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -162,7 +162,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, struct sctp_sockaddr_entry *addr; /* Add the address to the bind address list. */ - addr = t_new(struct sctp_sockaddr_entry, gfp); + addr = kzalloc(sizeof(*addr), gfp); if (!addr) return -ENOMEM; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 5fbd7bc6bb1..a8b26741c0a 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -192,9 +192,10 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) struct sctp_endpoint *ep; /* Build a local endpoint. */ - ep = t_new(struct sctp_endpoint, gfp); + ep = kzalloc(sizeof(*ep), gfp); if (!ep) goto fail; + if (!sctp_endpoint_init(ep, sk, gfp)) goto fail_init; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index fffc7b62a9a..4f3e13b31fc 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -402,7 +402,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { /* Add the address to the local list. */ - addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_port = 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index eaee00c6113..fad7d1b67be 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -153,7 +153,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { /* Add the address to the local list. */ - addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 098f1d5f769..5d3c71bbd19 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -116,7 +116,7 @@ struct sctp_transport *sctp_transport_new(struct net *net, { struct sctp_transport *transport; - transport = t_new(struct sctp_transport, gfp); + transport = kzalloc(sizeof(*transport), gfp); if (!transport) goto fail; -- cgit v1.2.3 From dda9192851dcf904b4d1095480834f2a4f814ae3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 17 Jun 2013 11:40:05 +0200 Subject: net: sctp: remove SCTP_STATIC macro SCTP_STATIC is just another define for the static keyword. It's use is inconsistent in the SCTP code anyway and it was introduced in the initial implementation of SCTP in 2.5. We have a regression suite in lksctp-tools, but this is for user space only, so noone makes use of this macro anymore. The kernel test suite for 2.5 is incompatible with the current SCTP code anyway. So simply Remove it, to be more consistent with the rest of the kernel code. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/chunk.c | 2 +- net/sctp/input.c | 4 +-- net/sctp/ipv6.c | 4 +-- net/sctp/protocol.c | 4 +-- net/sctp/sm_make_chunk.c | 10 +++---- net/sctp/socket.c | 78 ++++++++++++++++++++++-------------------------- net/sctp/tsnmap.c | 10 +++---- net/sctp/ulpevent.c | 10 +++---- 8 files changed, 57 insertions(+), 65 deletions(-) (limited to 'net') diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 69ce21e3716..7135fc0c087 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -66,7 +66,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) } /* Allocate and initialize datamsg. */ -SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) +static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) { struct sctp_datamsg *msg; msg = kmalloc(sizeof(struct sctp_datamsg), gfp); diff --git a/net/sctp/input.c b/net/sctp/input.c index 6533d81a638..4cfc74699a3 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -903,11 +903,11 @@ hit: } /* Look up an association. BH-safe. */ -SCTP_STATIC +static struct sctp_association *sctp_lookup_association(struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp) { struct sctp_association *asoc; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 4f3e13b31fc..adeaa0e64f5 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -145,8 +145,8 @@ static struct notifier_block sctp_inet6addr_notifier = { }; /* ICMP error handler. */ -SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) +static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { struct inet6_dev *idev; struct sock *sk; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index fad7d1b67be..57b568c38ef 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1312,7 +1312,7 @@ static struct pernet_operations sctp_net_ops = { }; /* Initialize the universe into something sensible. */ -SCTP_STATIC __init int sctp_init(void) +static __init int sctp_init(void) { int i; int status = -EINVAL; @@ -1499,7 +1499,7 @@ err_chunk_cachep: } /* Exit handler for the SCTP protocol. */ -SCTP_STATIC __exit void sctp_exit(void) +static __exit void sctp_exit(void) { /* BUG. This should probably do something useful like clean * up all the remaining associations and all that memory. diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index cf579e71cff..fc8548743ed 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -68,9 +68,8 @@ #include #include -SCTP_STATIC -struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, - __u8 type, __u8 flags, int paylen); +static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, + __u8 type, __u8 flags, int paylen); static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *init_chunk, @@ -1353,9 +1352,8 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk) /* Create a new chunk, setting the type and flags headers from the * arguments, reserving enough space for a 'paylen' byte payload. */ -SCTP_STATIC -struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, - __u8 type, __u8 flags, int paylen) +static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, + __u8 type, __u8 flags, int paylen) { struct sctp_chunk *retval; sctp_chunkhdr_t *chunk_hdr; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 510dc79a32a..75fe92ac2e9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -84,11 +84,6 @@ #include #include -/* WARNING: Please do not remove the SCTP_STATIC attribute to - * any of the functions below as they are used to export functions - * used by a project regression testsuite. - */ - /* Forward declarations for internal helper functions. */ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); @@ -279,7 +274,7 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, * sockaddr_in6 [RFC 2553]), * addr_len - the size of the address structure. */ -SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) +static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) { int retval = 0; @@ -333,7 +328,7 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, } /* Bind a local address either to an endpoint or to an association. */ -SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) +static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) { struct net *net = sock_net(sk); struct sctp_sock *sp = sctp_sk(sk); @@ -964,9 +959,9 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) * * Returns 0 if ok, <0 errno code on error. */ -SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size, int op) +static int sctp_setsockopt_bindx(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size, int op) { struct sockaddr *kaddrs; int err; @@ -1312,7 +1307,7 @@ out_free: * * Returns >=0 if ok, <0 errno code on error. */ -SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, +static int __sctp_setsockopt_connectx(struct sock* sk, struct sockaddr __user *addrs, int addrs_size, sctp_assoc_t *assoc_id) @@ -1350,9 +1345,9 @@ SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, * This is an older interface. It's kept for backward compatibility * to the option that doesn't provide association id. */ -SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size) +static int sctp_setsockopt_connectx_old(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) { return __sctp_setsockopt_connectx(sk, addrs, addrs_size, NULL); } @@ -1363,9 +1358,9 @@ SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk, * indication to the call. Error is always negative and association id is * always positive. */ -SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size) +static int sctp_setsockopt_connectx(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) { sctp_assoc_t assoc_id = 0; int err = 0; @@ -1386,9 +1381,9 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, * addrs_num structure member. That way we can re-use the existing * code. */ -SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, - char __user *optval, - int __user *optlen) +static int sctp_getsockopt_connectx3(struct sock* sk, int len, + char __user *optval, + int __user *optlen) { struct sctp_getaddrs_old param; sctp_assoc_t assoc_id = 0; @@ -1464,7 +1459,7 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, * shutdown phase does not finish during this period, close() will * return but the graceful shutdown phase continues in the system. */ -SCTP_STATIC void sctp_close(struct sock *sk, long timeout) +static void sctp_close(struct sock *sk, long timeout) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; @@ -1573,10 +1568,10 @@ static int sctp_error(struct sock *sk, int flags, int err) */ /* BUG: We do not implement the equivalent of sk_stream_wait_memory(). */ -SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); +static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); -SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t msg_len) +static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t msg_len) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -2034,9 +2029,9 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) */ static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); -SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); @@ -3565,8 +3560,8 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, * optval - the buffer to store the value of the option. * optlen - the size of the buffer. */ -SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) +static int sctp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) { int retval = 0; @@ -3725,8 +3720,8 @@ out_nounlock: * * len: the size of the address. */ -SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, - int addr_len) +static int sctp_connect(struct sock *sk, struct sockaddr *addr, + int addr_len) { int err = 0; struct sctp_af *af; @@ -3752,7 +3747,7 @@ SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, } /* FIXME: Write comments. */ -SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags) +static int sctp_disconnect(struct sock *sk, int flags) { return -EOPNOTSUPP; /* STUB */ } @@ -3764,7 +3759,7 @@ SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags) * descriptor will be returned from accept() to represent the newly * formed association. */ -SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err) +static struct sock *sctp_accept(struct sock *sk, int flags, int *err) { struct sctp_sock *sp; struct sctp_endpoint *ep; @@ -3817,7 +3812,7 @@ out: } /* The SCTP ioctl handler. */ -SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) { int rc = -ENOTCONN; @@ -3859,7 +3854,7 @@ out: * initialized the SCTP-specific portion of the sock. * The sock structure should already be zero-filled memory. */ -SCTP_STATIC int sctp_init_sock(struct sock *sk) +static int sctp_init_sock(struct sock *sk) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -3993,7 +3988,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) } /* Cleanup any SCTP per socket resources. */ -SCTP_STATIC void sctp_destroy_sock(struct sock *sk) +static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; @@ -4028,7 +4023,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) * Disables further send and receive operations * and initiates the SCTP shutdown sequence. */ -SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) +static void sctp_shutdown(struct sock *sk, int how) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; @@ -5700,8 +5695,8 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, return 0; } -SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +static int sctp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { int retval = 0; int len; @@ -6046,7 +6041,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) /* * Move a socket to LISTENING state. */ -SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) +static int sctp_listen_start(struct sock *sk, int backlog) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; @@ -6333,8 +6328,7 @@ static int sctp_autobind(struct sock *sk) * msg_control * points here */ -SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg, - sctp_cmsgs_t *cmsgs) +static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) { struct cmsghdr *cmsg; struct msghdr *my_msg = (struct msghdr *)msg; diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 396c45174e5..b46019568a8 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -161,8 +161,8 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn, /* Initialize a Gap Ack Block iterator from memory being provided. */ -SCTP_STATIC void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, - struct sctp_tsnmap_iter *iter) +static void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, + struct sctp_tsnmap_iter *iter) { /* Only start looking one past the Cumulative TSN Ack Point. */ iter->start = map->cumulative_tsn_ack_point + 1; @@ -171,9 +171,9 @@ SCTP_STATIC void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, /* Get the next Gap Ack Blocks. Returns 0 if there was not another block * to get. */ -SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, - struct sctp_tsnmap_iter *iter, - __u16 *start, __u16 *end) +static int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, + struct sctp_tsnmap_iter *iter, + __u16 *start, __u16 *end) { int ended = 0; __u16 start_ = 0, end_ = 0, offset; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 10c018a5b9f..44a45dbee4d 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -57,9 +57,9 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event); /* Initialize an ULP event from an given skb. */ -SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, - int msg_flags, - unsigned int len) +static void sctp_ulpevent_init(struct sctp_ulpevent *event, + int msg_flags, + unsigned int len) { memset(event, 0, sizeof(struct sctp_ulpevent)); event->msg_flags = msg_flags; @@ -67,8 +67,8 @@ SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, } /* Create a new sctp_ulpevent. */ -SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, - gfp_t gfp) +static struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, + gfp_t gfp) { struct sctp_ulpevent *event; struct sk_buff *skb; -- cgit v1.2.3 From a1193be83b4bb173228f04870afd6a4174b19130 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Fri, 14 Jun 2013 14:15:19 +0200 Subject: nl80211: use attributes to parse beacons only the attributes are required and not the whole netlink info, as the function accesses the attributes only anyway. This makes it easier to parse nested beacon IEs later. Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 53 ++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e4028197b75..1c4f7daea6c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2882,61 +2882,58 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) return err; } -static int nl80211_parse_beacon(struct genl_info *info, +static int nl80211_parse_beacon(struct nlattr *attrs[], struct cfg80211_beacon_data *bcn) { bool haveinfo = false; - if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL]) || - !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]) || - !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE_PROBE_RESP]) || - !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE_ASSOC_RESP])) + if (!is_valid_ie_attr(attrs[NL80211_ATTR_BEACON_TAIL]) || + !is_valid_ie_attr(attrs[NL80211_ATTR_IE]) || + !is_valid_ie_attr(attrs[NL80211_ATTR_IE_PROBE_RESP]) || + !is_valid_ie_attr(attrs[NL80211_ATTR_IE_ASSOC_RESP])) return -EINVAL; memset(bcn, 0, sizeof(*bcn)); - if (info->attrs[NL80211_ATTR_BEACON_HEAD]) { - bcn->head = nla_data(info->attrs[NL80211_ATTR_BEACON_HEAD]); - bcn->head_len = nla_len(info->attrs[NL80211_ATTR_BEACON_HEAD]); + if (attrs[NL80211_ATTR_BEACON_HEAD]) { + bcn->head = nla_data(attrs[NL80211_ATTR_BEACON_HEAD]); + bcn->head_len = nla_len(attrs[NL80211_ATTR_BEACON_HEAD]); if (!bcn->head_len) return -EINVAL; haveinfo = true; } - if (info->attrs[NL80211_ATTR_BEACON_TAIL]) { - bcn->tail = nla_data(info->attrs[NL80211_ATTR_BEACON_TAIL]); - bcn->tail_len = - nla_len(info->attrs[NL80211_ATTR_BEACON_TAIL]); + if (attrs[NL80211_ATTR_BEACON_TAIL]) { + bcn->tail = nla_data(attrs[NL80211_ATTR_BEACON_TAIL]); + bcn->tail_len = nla_len(attrs[NL80211_ATTR_BEACON_TAIL]); haveinfo = true; } if (!haveinfo) return -EINVAL; - if (info->attrs[NL80211_ATTR_IE]) { - bcn->beacon_ies = nla_data(info->attrs[NL80211_ATTR_IE]); - bcn->beacon_ies_len = nla_len(info->attrs[NL80211_ATTR_IE]); + if (attrs[NL80211_ATTR_IE]) { + bcn->beacon_ies = nla_data(attrs[NL80211_ATTR_IE]); + bcn->beacon_ies_len = nla_len(attrs[NL80211_ATTR_IE]); } - if (info->attrs[NL80211_ATTR_IE_PROBE_RESP]) { + if (attrs[NL80211_ATTR_IE_PROBE_RESP]) { bcn->proberesp_ies = - nla_data(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); + nla_data(attrs[NL80211_ATTR_IE_PROBE_RESP]); bcn->proberesp_ies_len = - nla_len(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); + nla_len(attrs[NL80211_ATTR_IE_PROBE_RESP]); } - if (info->attrs[NL80211_ATTR_IE_ASSOC_RESP]) { + if (attrs[NL80211_ATTR_IE_ASSOC_RESP]) { bcn->assocresp_ies = - nla_data(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); + nla_data(attrs[NL80211_ATTR_IE_ASSOC_RESP]); bcn->assocresp_ies_len = - nla_len(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); + nla_len(attrs[NL80211_ATTR_IE_ASSOC_RESP]); } - if (info->attrs[NL80211_ATTR_PROBE_RESP]) { - bcn->probe_resp = - nla_data(info->attrs[NL80211_ATTR_PROBE_RESP]); - bcn->probe_resp_len = - nla_len(info->attrs[NL80211_ATTR_PROBE_RESP]); + if (attrs[NL80211_ATTR_PROBE_RESP]) { + bcn->probe_resp = nla_data(attrs[NL80211_ATTR_PROBE_RESP]); + bcn->probe_resp_len = nla_len(attrs[NL80211_ATTR_PROBE_RESP]); } return 0; @@ -3015,7 +3012,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) !info->attrs[NL80211_ATTR_BEACON_HEAD]) return -EINVAL; - err = nl80211_parse_beacon(info, ¶ms.beacon); + err = nl80211_parse_beacon(info->attrs, ¶ms.beacon); if (err) return err; @@ -3167,7 +3164,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (!wdev->beacon_interval) return -EINVAL; - err = nl80211_parse_beacon(info, ¶ms); + err = nl80211_parse_beacon(info->attrs, ¶ms); if (err) return err; -- cgit v1.2.3 From f81a9dedaff434604c7fc3d9c299d277b76db0a8 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 13 Jun 2013 15:54:41 -0700 Subject: mac80211: update mesh beacon on workqueue Instead of updating the mesh beacon immediately when requested (which would require the sdata_lock()), defer it to the mac80211 workqueue. Fixes yet another deadlock on calling sta_info_flush() with the sdata_lock() held from ieee80211_stop_mesh(). We could just drop the sdata_lock() around the mesh_sta_cleanup() call, but this path is also taken from several non-locked error paths. Signed-off-by: Thomas Pedersen [fix comment position] Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mesh.c | 53 +++++++++++++++++++++++++++++++++++----------- net/mac80211/mesh.h | 2 ++ 3 files changed, 44 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a4dfb0be53d..194be3de16d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -544,6 +544,7 @@ struct ieee80211_if_mesh { struct timer_list mesh_path_root_timer; unsigned long wrkq_flags; + unsigned long mbss_changed; u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; size_t mesh_id_len; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6c33af482df..d5dea94216e 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -161,11 +161,8 @@ void mesh_sta_cleanup(struct sta_info *sta) del_timer_sync(&sta->plink_timer); } - if (changed) { - sdata_lock(sdata); + if (changed) ieee80211_mbss_info_change_notify(sdata, changed); - sdata_unlock(sdata); - } } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) @@ -719,14 +716,18 @@ ieee80211_mesh_rebuild_beacon(struct ieee80211_sub_if_data *sdata) void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed) { - if (sdata->vif.bss_conf.enable_beacon && - (changed & (BSS_CHANGED_BEACON | - BSS_CHANGED_HT | - BSS_CHANGED_BASIC_RATES | - BSS_CHANGED_BEACON_INT))) - if (ieee80211_mesh_rebuild_beacon(sdata)) - return; - ieee80211_bss_info_change_notify(sdata, changed); + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + unsigned long bits = changed; + u32 bit; + + if (!bits) + return; + + /* if we race with running work, worst case this work becomes a noop */ + for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE) + set_bit(bit, &ifmsh->mbss_changed); + set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); } int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) @@ -799,6 +800,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); del_timer_sync(&sdata->u.mesh.mesh_path_timer); + /* clear any mesh work (for next join) we may have accrued */ + ifmsh->wrkq_flags = 0; + ifmsh->mbss_changed = 0; + local->fif_other_bss--; atomic_dec(&local->iff_allmultis); ieee80211_configure_filter(local); @@ -965,6 +970,28 @@ out: sdata_unlock(sdata); } +static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u32 bit, changed = 0; + + for_each_set_bit(bit, &ifmsh->mbss_changed, + sizeof(changed) * BITS_PER_BYTE) { + clear_bit(bit, &ifmsh->mbss_changed); + changed |= BIT(bit); + } + + if (sdata->vif.bss_conf.enable_beacon && + (changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_HT | + BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BEACON_INT))) + if (ieee80211_mesh_rebuild_beacon(sdata)) + return; + + ieee80211_bss_info_change_notify(sdata, changed); +} + void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; @@ -995,6 +1022,8 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags)) mesh_sync_adjust_tbtt(sdata); + if (test_and_clear_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags)) + mesh_bss_info_changed(sdata); out: sdata_unlock(sdata); } diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 01a28bca6e9..2bc7fd2f787 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -58,6 +58,7 @@ enum mesh_path_flags { * @MESH_WORK_ROOT: the mesh root station needs to send a frame * @MESH_WORK_DRIFT_ADJUST: time to compensate for clock drift relative to other * mesh nodes + * @MESH_WORK_MBSS_CHANGED: rebuild beacon and notify driver of BSS changes */ enum mesh_deferred_task_flags { MESH_WORK_HOUSEKEEPING, @@ -65,6 +66,7 @@ enum mesh_deferred_task_flags { MESH_WORK_GROW_MPP_TABLE, MESH_WORK_ROOT, MESH_WORK_DRIFT_ADJUST, + MESH_WORK_MBSS_CHANGED, }; /** -- cgit v1.2.3 From 2f301ab29e4656af824592363039d8f6bd5a9f68 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Thu, 16 May 2013 13:00:28 +0200 Subject: nl80211/cfg80211: add 5 and 10 MHz defines and wiphy flag Add defines for 5 and 10 MHz channel width and fix channel handling functions accordingly. Also check for and report the WIPHY_FLAG_SUPPORTS_5_10_MHZ capability. Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer [fix spelling in comment] Signed-off-by: Johannes Berg --- net/wireless/chan.c | 57 +++++++++++++++++++++++++++++++++++++++++++------- net/wireless/nl80211.c | 21 +++++++++++++++---- 2 files changed, 66 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/wireless/chan.c b/net/wireless/chan.c index fd556ac05fd..50f6195c8b7 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -54,6 +54,8 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) control_freq = chandef->chan->center_freq; switch (chandef->width) { + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: if (chandef->center_freq1 != control_freq) @@ -152,6 +154,12 @@ static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) int width; switch (c->width) { + case NL80211_CHAN_WIDTH_5: + width = 5; + break; + case NL80211_CHAN_WIDTH_10: + width = 10; + break; case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: width = 20; @@ -194,6 +202,16 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, if (c1->width == c2->width) return NULL; + /* + * can't be compatible if one of them is 5 or 10 MHz, + * but they don't have the same width. + */ + if (c1->width == NL80211_CHAN_WIDTH_5 || + c1->width == NL80211_CHAN_WIDTH_10 || + c2->width == NL80211_CHAN_WIDTH_5 || + c2->width == NL80211_CHAN_WIDTH_10) + return NULL; + if (c1->width == NL80211_CHAN_WIDTH_20_NOHT || c1->width == NL80211_CHAN_WIDTH_20) return c2; @@ -264,11 +282,17 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, u32 bandwidth) { struct ieee80211_channel *c; - u32 freq; + u32 freq, start_freq, end_freq; + + if (bandwidth <= 20) { + start_freq = center_freq; + end_freq = center_freq; + } else { + start_freq = center_freq - bandwidth/2 + 10; + end_freq = center_freq + bandwidth/2 - 10; + } - for (freq = center_freq - bandwidth/2 + 10; - freq <= center_freq + bandwidth/2 - 10; - freq += 20) { + for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); if (!c) return -EINVAL; @@ -310,11 +334,17 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 prohibited_flags) { struct ieee80211_channel *c; - u32 freq; + u32 freq, start_freq, end_freq; + + if (bandwidth <= 20) { + start_freq = center_freq; + end_freq = center_freq; + } else { + start_freq = center_freq - bandwidth/2 + 10; + end_freq = center_freq + bandwidth/2 - 10; + } - for (freq = center_freq - bandwidth/2 + 10; - freq <= center_freq + bandwidth/2 - 10; - freq += 20) { + for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); if (!c) return false; @@ -349,6 +379,12 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, control_freq = chandef->chan->center_freq; switch (chandef->width) { + case NL80211_CHAN_WIDTH_5: + width = 5; + break; + case NL80211_CHAN_WIDTH_10: + width = 10; + break; case NL80211_CHAN_WIDTH_20: if (!ht_cap->ht_supported) return false; @@ -405,6 +441,11 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, if (width > 20) prohibited_flags |= IEEE80211_CHAN_NO_OFDM; + /* 5 and 10 MHz are only defined for the OFDM PHY */ + if (width < 20) + prohibited_flags |= IEEE80211_CHAN_NO_OFDM; + + if (!cfg80211_secondary_chans_ok(wiphy, chandef->center_freq1, width, prohibited_flags)) return false; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1c4f7daea6c..4ab1ffa9df1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1188,6 +1188,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && + nla_put_flag(msg, WIPHY_FLAG_SUPPORTS_5_10_MHZ)) + goto nla_put_failure; (*split_start)++; if (split) @@ -1731,6 +1734,11 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, IEEE80211_CHAN_DISABLED)) return -EINVAL; + if ((chandef->width == NL80211_CHAN_WIDTH_5 || + chandef->width == NL80211_CHAN_WIDTH_10) && + !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ)) + return -EINVAL; + return 0; } @@ -6280,11 +6288,16 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef)) return -EINVAL; - if (ibss.chandef.width > NL80211_CHAN_WIDTH_40) - return -EINVAL; - if (ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && - !(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) + switch (ibss.chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + break; + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + if (rdev->wiphy.features & NL80211_FEATURE_HT_IBSS) + break; + default: return -EINVAL; + } ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; -- cgit v1.2.3 From 0418a445838749c51cf1e31a9c7ace6685ae87cd Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Thu, 16 May 2013 13:00:31 +0200 Subject: mac80211: fix various components for the new 5 and 10 MHz widths This is a collection of minor fixes: * don't allow HT IEs in IBSS for 5/10 MHz * don't allow HT IEs in Mesh for 5/10 MHz * don't downgrade from/to 5 and 10 MHz channels * don't try HT rates for 5 and 10 MHz channels when selecting rates Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 2 ++ net/mac80211/mesh.c | 4 +++- net/mac80211/mesh_plink.c | 8 +++++++- net/mac80211/mlme.c | 12 ++++++++++++ net/mac80211/rate.c | 8 +++++++- 5 files changed, 31 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index caa4b4f7f6e..3789c85282a 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -176,6 +176,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, /* add HT capability and information IEs */ if (chandef.width != NL80211_CHAN_WIDTH_20_NOHT && + chandef.width != NL80211_CHAN_WIDTH_5 && + chandef.width != NL80211_CHAN_WIDTH_10 && sband->ht_cap.ht_supported) { pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, sband->ht_cap.cap); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index d5dea94216e..447f41bbe74 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -416,7 +416,9 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[band]; if (!sband->ht_cap.ht_supported || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap)) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 09bebed9941..02c05fa15c2 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -154,8 +154,14 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) u16 ht_opmode; bool non_ht_sta = false, ht20_sta = false; - if (sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) + switch (sdata->vif.bss_conf.chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: return 0; + default: + break; + } rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 87f2d4df31f..e0939eb7906 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -190,6 +190,12 @@ static u32 chandef_downgrade(struct cfg80211_chan_def *c) c->width = NL80211_CHAN_WIDTH_20_NOHT; ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; break; + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + WARN_ON_ONCE(1); + /* keep c->width */ + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + break; } WARN_ON_ONCE(!cfg80211_chandef_valid(c)); @@ -3771,6 +3777,12 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, */ ret = ieee80211_vif_use_channel(sdata, &chandef, IEEE80211_CHANCTX_SHARED); + + /* don't downgrade for 5 and 10 MHz channels, though. */ + if (chandef.width == NL80211_CHAN_WIDTH_5 || + chandef.width == NL80211_CHAN_WIDTH_10) + return ret; + while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { ifmgd->flags |= chandef_downgrade(&chandef); ret = ieee80211_vif_use_channel(sdata, &chandef, diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index d3f414fe67e..dbbcd57b0fc 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -397,8 +397,14 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, return; /* if HT BSS, and we handle a data frame, also try HT rates */ - if (chan_width == NL80211_CHAN_WIDTH_20_NOHT) + switch (chan_width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: return; + default: + break; + } alt_rate.idx = 0; /* keep protection flags */ -- cgit v1.2.3 From 3aede78aad2a7e39a81b4b0caa771d40254a6787 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Thu, 16 May 2013 13:00:36 +0200 Subject: mac80211: change IBSS channel state to chandef This should make some parts cleaner and is also required for handling 5/10 MHz properly. Signed-off-by: Simon Wunderlich Signed-off-by: Mathias Kretschmer Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 22 +++++++++++----------- net/mac80211/ieee80211_i.h | 3 +-- 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 3789c85282a..eaacfd27061 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -81,7 +81,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - cfg80211_chandef_create(&chandef, chan, ifibss->channel_type); + chandef = ifibss->chandef; if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { chandef.width = NL80211_CHAN_WIDTH_20; chandef.center_freq1 = chan->center_freq; @@ -516,7 +516,9 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, set_sta_flag(sta, WLAN_STA_WME); if (sta && elems->ht_operation && elems->ht_cap_elem && - sdata->u.ibss.channel_type != NL80211_CHAN_NO_HT) { + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_5 && + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_10) { /* we both use HT */ struct ieee80211_ht_cap htcap_ie; struct cfg80211_chan_def chandef; @@ -531,8 +533,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, * fall back to HT20 if we don't use or use * the other extension channel */ - if (cfg80211_get_chandef_type(&chandef) != - sdata->u.ibss.channel_type) + if (chandef.center_freq1 != + sdata->u.ibss.chandef.center_freq1) htcap_ie.cap_info &= cpu_to_le16(~IEEE80211_HT_CAP_SUP_WIDTH_20_40); @@ -571,7 +573,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, /* different channel */ if (sdata->u.ibss.fixed_channel && - sdata->u.ibss.channel != cbss->channel) + sdata->u.ibss.chandef.chan != cbss->channel) goto put_bss; /* different SSID */ @@ -761,7 +763,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) sdata->drop_unencrypted = 0; __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int, - ifibss->channel, ifibss->basic_rates, + ifibss->chandef.chan, ifibss->basic_rates, capability, 0, true); } @@ -793,7 +795,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) if (ifibss->fixed_bssid) bssid = ifibss->bssid; if (ifibss->fixed_channel) - chan = ifibss->channel; + chan = ifibss->chandef.chan; if (!is_zero_ether_addr(ifibss->bssid)) bssid = ifibss->bssid; cbss = cfg80211_get_bss(local->hw.wiphy, chan, bssid, @@ -1060,9 +1062,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.beacon_int = params->beacon_interval; - sdata->u.ibss.channel = params->chandef.chan; - sdata->u.ibss.channel_type = - cfg80211_get_chandef_type(¶ms->chandef); + sdata->u.ibss.chandef = params->chandef; sdata->u.ibss.fixed_channel = params->channel_fixed; if (params->ie) { @@ -1121,7 +1121,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) if (ifibss->privacy) capability |= WLAN_CAPABILITY_PRIVACY; - cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->channel, + cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, ifibss->ssid_len, WLAN_CAPABILITY_IBSS | WLAN_CAPABILITY_PRIVACY, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 194be3de16d..1bfc3955005 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -504,8 +504,7 @@ struct ieee80211_if_ibss { u8 ssid[IEEE80211_MAX_SSID_LEN]; u8 ssid_len, ie_len; u8 *ie; - struct ieee80211_channel *channel; - enum nl80211_channel_type channel_type; + struct cfg80211_chan_def chandef; unsigned long ibss_join_req; /* probe response/beacon for IBSS */ -- cgit v1.2.3 From 52874a5e3917dde3b081521b014d6e4b226aacff Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 18 Jun 2013 14:20:40 +0200 Subject: Revert "mac80211: in IBSS use the Auth frame to trigger STA reinsertion" This reverts commit 6d810f10325522cfcf498dc6d64b9f96e1f5153f In this way an IBSS station will not use the AUTH messages to trigger a state reinitialisation anymore. The behaviour was racy and was not working properly. It has been introduced to help wpa_supplicant to support IBSS/RSN, however all the logic is now getting moved into wpa_s itself which will also be in charge of handling the AUTH messages thanks to the mgmt frame registration. If userspace does not register for receiving AUTH frames then mac80211 will still reply by itself. At the same time, the auth frame registration counter can be removed since it is not needed anymore. Signed-off-by: Antonio Quartulli [remove unused variable] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 11 ----------- net/mac80211/ibss.c | 41 +++++++---------------------------------- net/mac80211/ieee80211_i.h | 1 - 3 files changed, 7 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 64cf294c2b9..18ba7ed3ef0 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2924,19 +2924,8 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, u16 frame_type, bool reg) { struct ieee80211_local *local = wiphy_priv(wiphy); - struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); switch (frame_type) { - case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH: - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { - struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - - if (reg) - ifibss->auth_frame_registrations++; - else - ifibss->auth_frame_registrations--; - } - break; case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ: if (reg) local->probe_req_reg++; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index eaacfd27061..ea7b9c2c7e6 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -300,8 +300,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, tsf, false); } -static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, - bool auth) +static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta) __acquires(RCU) { struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -323,20 +322,12 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, /* If it fails, maybe we raced another insertion? */ if (sta_info_insert_rcu(sta)) return sta_info_get(sdata, addr); - if (auth && !sdata->u.ibss.auth_frame_registrations) { - ibss_dbg(sdata, - "TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", - sdata->vif.addr, addr, sdata->u.ibss.bssid); - ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, 0, NULL, 0, - addr, sdata->u.ibss.bssid, NULL, 0, 0, 0); - } return sta; } static struct sta_info * -ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - const u8 *bssid, const u8 *addr, - u32 supp_rates, bool auth) +ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, + const u8 *addr, u32 supp_rates) __acquires(RCU) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; @@ -387,7 +378,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, sta->sta.supp_rates[band] = supp_rates | ieee80211_mandatory_rates(sband); - return ieee80211_ibss_finish_sta(sta, auth); + return ieee80211_ibss_finish_sta(sta); } static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, @@ -409,8 +400,6 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, size_t len) { u16 auth_alg, auth_transaction; - struct sta_info *sta; - u8 deauth_frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; sdata_assert_lock(sdata); @@ -427,22 +416,6 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; - sta_info_destroy_addr(sdata, mgmt->sa); - sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false); - rcu_read_unlock(); - - /* - * if we have any problem in allocating the new station, we reply with a - * DEAUTH frame to tell the other end that we had a problem - */ - if (!sta) { - ieee80211_send_deauth_disassoc(sdata, sdata->u.ibss.bssid, - IEEE80211_STYPE_DEAUTH, - WLAN_REASON_UNSPECIFIED, true, - deauth_frame_buf); - return; - } - /* * IEEE 802.11 standard does not require authentication in IBSS * networks and most implementations do not seem to use it. @@ -508,7 +481,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } else { rcu_read_unlock(); sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, - mgmt->sa, supp_rates, true); + mgmt->sa, supp_rates); } } @@ -614,7 +587,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, ieee80211_sta_join_ibss(sdata, bss); supp_rates = ieee80211_sta_get_rates(local, elems, band, NULL); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, - supp_rates, true); + supp_rates); rcu_read_unlock(); } @@ -986,7 +959,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) list_del(&sta->list); spin_unlock_bh(&ifibss->incomplete_lock); - ieee80211_ibss_finish_sta(sta, true); + ieee80211_ibss_finish_sta(sta); rcu_read_unlock(); spin_lock_bh(&ifibss->incomplete_lock); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1bfc3955005..00d71e9a8fb 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -498,7 +498,6 @@ struct ieee80211_if_ibss { bool privacy; bool control_port; - unsigned int auth_frame_registrations; u8 bssid[ETH_ALEN] __aligned(2); u8 ssid[IEEE80211_MAX_SSID_LEN]; -- cgit v1.2.3 From e401452d923de5b27f61f707773ec38f5593d985 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Jun 2013 09:10:29 -0400 Subject: rpc_pipefs: only set rpc_dentry_ops if d_op isn't already set We had a report of a reproducible WARNING: [ 1360.039358] ------------[ cut here ]------------ [ 1360.043978] WARNING: at fs/dcache.c:1355 d_set_d_op+0x8d/0xc0() [ 1360.049880] Hardware name: HP Z200 Workstation [ 1360.054308] Modules linked in: nfsv4 nfs dns_resolver fscache nfsd auth_rpcgss nfs_acl lockd sunrpc sg acpi_cpufreq mperf coretemp kvm_intel kvm snd_hda_codec_realtek snd_hda_intel snd_hda_codec hp_wmi crc32c_intel snd_hwdep e1000e snd_seq snd_seq_device snd_pcm snd_page_alloc snd_timer snd sparse_keymap rfkill soundcore serio_raw ptp iTCO_wdt pps_core pcspkr iTCO_vendor_support mei microcode lpc_ich mfd_core wmi xfs libcrc32c sr_mod sd_mod cdrom crc_t10dif radeon i2c_algo_bit drm_kms_helper ttm ahci libahci drm i2c_core libata dm_mirror dm_region_hash dm_log dm_mod [last unloaded: auth_rpcgss] [ 1360.107406] Pid: 8814, comm: mount.nfs4 Tainted: G I -------------- 3.9.0-0.55.el7.x86_64 #1 [ 1360.116771] Call Trace: [ 1360.119219] [] warn_slowpath_common+0x70/0xa0 [ 1360.125208] [] warn_slowpath_null+0x1a/0x20 [ 1360.131025] [] d_set_d_op+0x8d/0xc0 [ 1360.136159] [] __rpc_lookup_create_exclusive+0x4f/0x80 [sunrpc] [ 1360.143710] [] rpc_mkpipe_dentry+0x86/0x170 [sunrpc] [ 1360.150311] [] nfs_idmap_new+0x96/0x130 [nfsv4] [ 1360.156475] [] nfs4_init_client+0xad/0x2d0 [nfsv4] [ 1360.162902] [] ? idr_get_empty_slot+0x16f/0x3c0 [ 1360.169062] [] ? idr_mark_full+0x52/0x60 [ 1360.174615] [] ? idr_alloc+0x79/0xe0 [ 1360.179826] [] ? __rpc_init_priority_wait_queue+0x81/0xc0 [sunrpc] [ 1360.187635] [] ? rpc_init_wait_queue+0x13/0x20 [sunrpc] [ 1360.194493] [] nfs_get_client+0x27a/0x350 [nfs] [ 1360.200666] [] nfs4_set_client.isra.8+0x78/0x100 [nfsv4] [ 1360.207624] [] nfs4_create_server+0xf3/0x3a0 [nfsv4] [ 1360.214222] [] nfs4_remote_mount+0x2e/0x60 [nfsv4] [ 1360.220644] [] mount_fs+0x39/0x1b0 [ 1360.225691] [] ? __alloc_percpu+0x10/0x20 [ 1360.231348] [] vfs_kern_mount+0x5f/0xf0 [ 1360.236822] [] nfs_do_root_mount+0x86/0xc0 [nfsv4] [ 1360.243246] [] nfs4_try_mount+0x44/0xc0 [nfsv4] [ 1360.249410] [] ? get_nfs_version+0x27/0x80 [nfs] [ 1360.255659] [] nfs_fs_mount+0x5c5/0xd10 [nfs] [ 1360.261650] [] ? nfs_clone_super+0x140/0x140 [nfs] [ 1360.268074] [] ? param_set_portnr+0x60/0x60 [nfs] [ 1360.274406] [] mount_fs+0x39/0x1b0 [ 1360.279443] [] ? __alloc_percpu+0x10/0x20 [ 1360.285088] [] vfs_kern_mount+0x5f/0xf0 [ 1360.290556] [] do_mount+0x1fd/0xa00 [ 1360.295677] [] ? __get_free_pages+0xe/0x50 [ 1360.301405] [] ? copy_mount_options+0x36/0x170 [ 1360.307479] [] sys_mount+0x83/0xc0 [ 1360.312515] [] system_call_fastpath+0x16/0x1b [ 1360.318503] ---[ end trace 8fa1f4cbc36094a7 ]--- The problem is that we're ending up in __rpc_lookup_create_exclusive with a negative dentry that already has d_op set. A little debugging has shown that when we hit this, the d_ops are already set to simple_dentry_operations. I believe that what's happening is that during a mount, idmapd is racing in and doing a lookup of /var/lib/nfs/rpc_pipefs/nfs/clnt???/idmap. Before that dentry reference is released, the kernel races in to create that file and finds the new negative dentry, which already has the d_op set. This patch just avoids setting the d_op if it's already set. simple_dentry_operations and rpc_dentry_operations are functionally equivalent so it shouldn't matter which one it's set to. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e7ce4b3eb0b..a816b3a6905 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -667,7 +667,8 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, return ERR_PTR(-ENOMEM); } if (dentry->d_inode == NULL) { - d_set_d_op(dentry, &rpc_dentry_operations); + if (!dentry->d_op) + d_set_d_op(dentry, &rpc_dentry_operations); return dentry; } dput(dentry); -- cgit v1.2.3 From 86e8cf98de3e74bbfb0003501e0004bf1e5e2618 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Jun 2013 10:57:22 +0200 Subject: nl80211: use small state buffer for wiphy_dump Avoid parsing the original dump message again and again by allocating a small state struct that is used by the functions involved in the dump, storing this struct in cb->args[0]. This reduces the memory allocation size as well. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 210 +++++++++++++++++++++++++++---------------------- 1 file changed, 116 insertions(+), 94 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f8ffb9a59c8..7dc3343427c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1111,10 +1111,16 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg, return 0; } +struct nl80211_dump_wiphy_state { + s64 filter_wiphy; + long start; + long split_start, band_start, chan_start; + bool split; +}; + static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, struct sk_buff *msg, u32 portid, u32 seq, - int flags, bool split, long *split_start, - long *band_start, long *chan_start) + int flags, struct nl80211_dump_wiphy_state *state) { void *hdr; struct nlattr *nl_bands, *nl_band; @@ -1125,19 +1131,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, int i; const struct ieee80211_txrx_stypes *mgmt_stypes = dev->wiphy.mgmt_stypes; - long start = 0, start_chan = 0, start_band = 0; u32 features; hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) return -ENOBUFS; - /* allow always using the variables */ - if (!split) { - split_start = &start; - band_start = &start_band; - chan_start = &start_chan; - } + if (WARN_ON(!state)) + return -EINVAL; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, @@ -1146,7 +1147,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, cfg80211_rdev_list_generation)) goto nla_put_failure; - switch (*split_start) { + switch (state->split_start) { case 0: if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, dev->wiphy.retry_short) || @@ -1192,8 +1193,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, nla_put_flag(msg, WIPHY_FLAG_SUPPORTS_5_10_MHZ)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 1: if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, @@ -1237,22 +1238,23 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, } } - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 2: if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, dev->wiphy.interface_modes)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 3: nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); if (!nl_bands) goto nla_put_failure; - for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) { + for (band = state->band_start; + band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; sband = dev->wiphy.bands[band]; @@ -1264,12 +1266,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nl_band) goto nla_put_failure; - switch (*chan_start) { + switch (state->chan_start) { case 0: if (nl80211_send_band_rateinfo(msg, sband)) goto nla_put_failure; - (*chan_start)++; - if (split) + state->chan_start++; + if (state->split) break; default: /* add frequencies */ @@ -1278,7 +1280,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nl_freqs) goto nla_put_failure; - for (i = *chan_start - 1; + for (i = state->chan_start - 1; i < sband->n_channels; i++) { nl_freq = nla_nest_start(msg, i); @@ -1287,26 +1289,27 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, chan = &sband->channels[i]; - if (nl80211_msg_put_channel(msg, chan, - split)) + if (nl80211_msg_put_channel( + msg, chan, + state->split)) goto nla_put_failure; nla_nest_end(msg, nl_freq); - if (split) + if (state->split) break; } if (i < sband->n_channels) - *chan_start = i + 2; + state->chan_start = i + 2; else - *chan_start = 0; + state->chan_start = 0; nla_nest_end(msg, nl_freqs); } nla_nest_end(msg, nl_band); - if (split) { + if (state->split) { /* start again here */ - if (*chan_start) + if (state->chan_start) band--; break; } @@ -1314,14 +1317,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, nla_nest_end(msg, nl_bands); if (band < IEEE80211_NUM_BANDS) - *band_start = band + 1; + state->band_start = band + 1; else - *band_start = 0; + state->band_start = 0; /* if bands & channels are done, continue outside */ - if (*band_start == 0 && *chan_start == 0) - (*split_start)++; - if (split) + if (state->band_start == 0 && state->chan_start == 0) + state->split_start++; + if (state->split) break; case 4: nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); @@ -1387,7 +1390,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, } CMD(start_p2p_device, START_P2P_DEVICE); CMD(set_mcast_rate, SET_MCAST_RATE); - if (split) { + if (state->split) { CMD(crit_proto_start, CRIT_PROTOCOL_START); CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); } @@ -1411,8 +1414,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, } nla_nest_end(msg, nl_cmds); - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 5: if (dev->ops->remain_on_channel && @@ -1428,29 +1431,30 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (nl80211_send_mgmt_stypes(msg, mgmt_stypes)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 6: #ifdef CONFIG_PM - if (nl80211_send_wowlan(msg, dev, split)) + if (nl80211_send_wowlan(msg, dev, state->split)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; #else - (*split_start)++; + state->split_start++; #endif case 7: if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, dev->wiphy.software_iftypes)) goto nla_put_failure; - if (nl80211_put_iface_combinations(&dev->wiphy, msg, split)) + if (nl80211_put_iface_combinations(&dev->wiphy, msg, + state->split)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 8: if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && @@ -1464,7 +1468,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, * dump is split, otherwise it makes it too big. Therefore * only advertise it in that case. */ - if (split) + if (state->split) features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features)) goto nla_put_failure; @@ -1491,7 +1495,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, * case we'll continue with more data in the next round, * but break unconditionally so unsplit data stops here. */ - (*split_start)++; + state->split_start++; break; case 9: if (dev->wiphy.extended_capabilities && @@ -1510,7 +1514,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, goto nla_put_failure; /* done */ - *split_start = 0; + state->split_start = 0; break; } return genlmsg_end(msg, hdr); @@ -1520,66 +1524,76 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, return -EMSGSIZE; } +static int nl80211_dump_wiphy_parse(struct sk_buff *skb, + struct netlink_callback *cb, + struct nl80211_dump_wiphy_state *state) +{ + struct nlattr **tb = nl80211_fam.attrbuf; + int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, + tb, nl80211_fam.maxattr, nl80211_policy); + /* ignore parse errors for backward compatibility */ + if (ret) + return 0; + + state->split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; + if (tb[NL80211_ATTR_WIPHY]) + state->filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); + if (tb[NL80211_ATTR_WDEV]) + state->filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; + if (tb[NL80211_ATTR_IFINDEX]) { + struct net_device *netdev; + struct cfg80211_registered_device *rdev; + int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); + + netdev = dev_get_by_index(sock_net(skb->sk), ifidx); + if (!netdev) + return -ENODEV; + if (netdev->ieee80211_ptr) { + rdev = wiphy_to_dev( + netdev->ieee80211_ptr->wiphy); + state->filter_wiphy = rdev->wiphy_idx; + } + dev_put(netdev); + } + + return 0; +} + static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) { int idx = 0, ret; - int start = cb->args[0]; + struct nl80211_dump_wiphy_state *state = (void *)cb->args[0]; struct cfg80211_registered_device *dev; - s64 filter_wiphy = -1; - bool split = false; - struct nlattr **tb; - int res; - - /* will be zeroed in nlmsg_parse() */ - tb = kmalloc(sizeof(*tb) * (NL80211_ATTR_MAX + 1), GFP_KERNEL); - if (!tb) - return -ENOMEM; rtnl_lock(); - res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - tb, NL80211_ATTR_MAX, nl80211_policy); - if (res == 0) { - split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; - if (tb[NL80211_ATTR_WIPHY]) - filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); - if (tb[NL80211_ATTR_WDEV]) - filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; - if (tb[NL80211_ATTR_IFINDEX]) { - struct net_device *netdev; - int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); - - netdev = dev_get_by_index(sock_net(skb->sk), ifidx); - if (!netdev) { - rtnl_unlock(); - kfree(tb); - return -ENODEV; - } - if (netdev->ieee80211_ptr) { - dev = wiphy_to_dev( - netdev->ieee80211_ptr->wiphy); - filter_wiphy = dev->wiphy_idx; - } - dev_put(netdev); + if (!state) { + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + state->filter_wiphy = -1; + ret = nl80211_dump_wiphy_parse(skb, cb, state); + if (ret) { + kfree(state); + rtnl_unlock(); + return ret; } + cb->args[0] = (long)state; } - kfree(tb); list_for_each_entry(dev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) continue; - if (++idx <= start) + if (++idx <= state->start) continue; - if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy) + if (state->filter_wiphy != -1 && + state->filter_wiphy != dev->wiphy_idx) continue; /* attempt to fit multiple wiphy data chunks into the skb */ do { ret = nl80211_send_wiphy(dev, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI, - split, &cb->args[1], - &cb->args[2], - &cb->args[3]); + NLM_F_MULTI, state); if (ret < 0) { /* * If sending the wiphy data didn't fit (ENOBUFS @@ -1604,27 +1618,34 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) idx--; break; } - } while (cb->args[1] > 0); + } while (state->split_start > 0); break; } rtnl_unlock(); - cb->args[0] = idx; + state->start = idx; return skb->len; } +static int nl80211_dump_wiphy_done(struct netlink_callback *cb) +{ + kfree((void *)cb->args[0]); + return 0; +} + static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct cfg80211_registered_device *dev = info->user_ptr[0]; + struct nl80211_dump_wiphy_state state = {}; msg = nlmsg_new(4096, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0, - false, NULL, NULL, NULL) < 0) { + &state) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -8418,6 +8439,7 @@ static struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_WIPHY, .doit = nl80211_get_wiphy, .dumpit = nl80211_dump_wiphy, + .done = nl80211_dump_wiphy_done, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WIPHY | @@ -9038,13 +9060,13 @@ static struct genl_multicast_group nl80211_regulatory_mcgrp = { void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) { struct sk_buff *msg; + struct nl80211_dump_wiphy_state state = {}; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, - false, NULL, NULL, NULL) < 0) { + if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, &state) < 0) { nlmsg_free(msg); return; } -- cgit v1.2.3 From 959867fa55d0cb55fb3d08656e5e62607167617f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Jun 2013 13:05:42 +0200 Subject: cfg80211: require passing BSS struct back to cfg80211_assoc_timeout Doing so will allow us to hold the BSS (not just ref it) over the association process, thus ensuring that it doesn't time out and gets invisible to the user (e.g. in 'iw wlan0 link'.) This also fixes a leak in mac80211 where it doesn't always release the BSS struct properly in all cases where calling this function. This leak was reported by Ben Greear. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 15 +++++++-------- net/wireless/mlme.c | 8 +++++--- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 34d54fe8148..ae31968d42d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2795,8 +2795,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false); - cfg80211_put_bss(sdata->local->hw.wiphy, bss); - cfg80211_assoc_timeout(sdata->dev, mgmt->bssid); + cfg80211_assoc_timeout(sdata->dev, bss); return; } sdata_info(sdata, "associated\n"); @@ -3513,13 +3512,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) time_after(jiffies, ifmgd->assoc_data->timeout)) { if ((ifmgd->assoc_data->need_beacon && !ifmgd->have_beacon) || ieee80211_do_assoc(sdata)) { - u8 bssid[ETH_ALEN]; - - memcpy(bssid, ifmgd->assoc_data->bss->bssid, ETH_ALEN); + struct cfg80211_bss *bss = ifmgd->assoc_data->bss; ieee80211_destroy_assoc_data(sdata, false); - - cfg80211_assoc_timeout(sdata->dev, bssid); + cfg80211_assoc_timeout(sdata->dev, bss); } } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) run_again(sdata, ifmgd->assoc_data->timeout); @@ -4445,8 +4441,11 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&ifmgd->chswitch_work); sdata_lock(sdata); - if (ifmgd->assoc_data) + if (ifmgd->assoc_data) { + struct cfg80211_bss *bss = ifmgd->assoc_data->bss; ieee80211_destroy_assoc_data(sdata, false); + cfg80211_assoc_timeout(sdata->dev, bss); + } if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); del_timer_sync(&ifmgd->timer); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index a61a44bc6cf..dd6f79d7bd2 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -131,16 +131,18 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr) } EXPORT_SYMBOL(cfg80211_auth_timeout); -void cfg80211_assoc_timeout(struct net_device *dev, const u8 *addr) +void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - trace_cfg80211_send_assoc_timeout(dev, addr); + trace_cfg80211_send_assoc_timeout(dev, bss->bssid); - nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL); + nl80211_send_assoc_timeout(rdev, dev, bss->bssid, GFP_KERNEL); cfg80211_sme_assoc_timeout(wdev); + + cfg80211_put_bss(wiphy, bss); } EXPORT_SYMBOL(cfg80211_assoc_timeout); -- cgit v1.2.3 From f1940c5730f0f0555e42afbcf629be7f7fbbce8e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Jun 2013 13:21:15 +0200 Subject: cfg80211: hold BSS over association process This fixes the potential issue that the BSS struct that we use and later assign to wdev->current_bss is removed from the scan list while associating. Also warn when we don't have a BSS struct in connect_result unless it's from a driver that only has the connect() API. Signed-off-by: Johannes Berg --- net/wireless/mlme.c | 4 ++++ net/wireless/sme.c | 15 ++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index dd6f79d7bd2..bfac5e186f5 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -38,6 +38,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, * frame instead of reassoc. */ if (cfg80211_sme_rx_assoc_resp(wdev, status_code)) { + cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wiphy, bss); return; } @@ -142,6 +143,7 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) nl80211_send_assoc_timeout(rdev, dev, bss->bssid, GFP_KERNEL); cfg80211_sme_assoc_timeout(wdev); + cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wiphy, bss); } EXPORT_SYMBOL(cfg80211_assoc_timeout); @@ -309,6 +311,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, goto out; err = rdev_assoc(rdev, dev, req); + if (!err) + cfg80211_hold_bss(bss_from_pub(req->bss)); out: if (err) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index ae7e2cbf45c..c0bf781d4fb 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -615,19 +615,24 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, kfree(wdev->connect_keys); wdev->connect_keys = NULL; wdev->ssid_len = 0; - cfg80211_put_bss(wdev->wiphy, bss); + if (bss) { + cfg80211_unhold_bss(bss_from_pub(bss)); + cfg80211_put_bss(wdev->wiphy, bss); + } return; } - if (!bss) + if (!bss) { + WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, wdev->ssid, wdev->ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - if (WARN_ON(!bss)) - return; + if (WARN_ON(!bss)) + return; + cfg80211_hold_bss(bss_from_pub(bss)); + } - cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); cfg80211_upload_connect_keys(wdev); -- cgit v1.2.3 From 20fd4d1f04da07d09192ad8ad366a70d5125bfaf Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:49:32 -0700 Subject: gre: Simplify gre protocol registration locking. Use cmpxchg() for atomic protocol registration which saves code and data space. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/gre.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index b2e805af9b8..1e294d510ac 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -26,46 +26,32 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; -static DEFINE_SPINLOCK(gre_proto_lock); int gre_add_protocol(const struct gre_protocol *proto, u8 version) { if (version >= GREPROTO_MAX) - goto err_out; - - spin_lock(&gre_proto_lock); - if (gre_proto[version]) - goto err_out_unlock; - - RCU_INIT_POINTER(gre_proto[version], proto); - spin_unlock(&gre_proto_lock); - return 0; + return -EINVAL; -err_out_unlock: - spin_unlock(&gre_proto_lock); -err_out: - return -1; + return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ? + 0 : -EBUSY; } EXPORT_SYMBOL_GPL(gre_add_protocol); int gre_del_protocol(const struct gre_protocol *proto, u8 version) { + int ret; + if (version >= GREPROTO_MAX) - goto err_out; - - spin_lock(&gre_proto_lock); - if (rcu_dereference_protected(gre_proto[version], - lockdep_is_held(&gre_proto_lock)) != proto) - goto err_out_unlock; - RCU_INIT_POINTER(gre_proto[version], NULL); - spin_unlock(&gre_proto_lock); + return -EINVAL; + + ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ? + 0 : -EBUSY; + + if (ret) + return ret; + synchronize_rcu(); return 0; - -err_out_unlock: - spin_unlock(&gre_proto_lock); -err_out: - return -1; } EXPORT_SYMBOL_GPL(gre_del_protocol); -- cgit v1.2.3 From bda7bb46343647f68591366731295a0f3eea59ed Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:49:38 -0700 Subject: gre: Allow multiple protocol listener for gre protocol. Currently there is only one user is allowed to register for gre protocol. Following patch adds de-multiplexer. So that multiple modules can listen on gre protocol e.g. kernel gre devices and ovs. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/gre.c | 221 +++++++++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/ip_gre.c | 173 +++++++----------------------------------- 2 files changed, 243 insertions(+), 151 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 1e294d510ac..8b9a373890a 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -13,6 +13,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include +#include #include #include #include @@ -24,8 +26,12 @@ #include #include +#include +#include +#include static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; +static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX]; int gre_add_protocol(const struct gre_protocol *proto, u8 version) { @@ -55,6 +61,173 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); +static __sum16 check_checksum(struct sk_buff *skb) +{ + __sum16 csum = 0; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); + + if (!csum) + break; + /* Fall through. */ + + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; + } + + return csum; +} + +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err) +{ + unsigned int ip_hlen = ip_hdrlen(skb); + const struct gre_base_hdr *greh; + __be32 *options; + int hdr_len; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; + + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + hdr_len = ip_gre_calc_hlen(tpi->flags); + + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + tpi->proto = greh->protocol; + + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (check_checksum(skb)) { + *csum_err = true; + return -EINVAL; + } + options++; + } + + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else + tpi->key = 0; + + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else + tpi->seq = 0; + + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + hdr_len += 4; + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + } + } + return 0; +} + +static int gre_cisco_rcv(struct sk_buff *skb) +{ + struct tnl_ptk_info tpi; + int i; + bool csum_err = false; + + if (parse_gre_header(skb, &tpi, &csum_err) < 0) + goto drop; + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + int ret; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + ret = proto->handler(skb, &tpi); + if (ret == PACKET_RCVD) { + rcu_read_unlock(); + return 0; + } + } + rcu_read_unlock(); + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); +drop: + kfree_skb(skb); + return 0; +} + +static void gre_cisco_err(struct sk_buff *skb, u32 info) +{ + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + * + * Moreover, Cisco "wise men" put GRE key to the third word + * in GRE header. It makes impossible maintaining even soft + * state for keyed + * GRE tunnels with enabled checksum. Tell them "thank you". + * + * Well, I wonder, rfc1812 was written by Cisco employee, + * what the hell these idiots break standards established + * by themselves??? + */ + + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; + struct tnl_ptk_info tpi; + bool csum_err = false; + int i; + + if (parse_gre_header(skb, &tpi, &csum_err)) { + if (!csum_err) /* ignore csum errors. */ + return; + } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + skb->dev->ifindex, 0, IPPROTO_GRE, 0); + return; + } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, + IPPROTO_GRE, 0); + return; + } + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + + if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD) + goto out; + + } +out: + rcu_read_unlock(); +} + static int gre_rcv(struct sk_buff *skb) { const struct gre_protocol *proto; @@ -206,27 +379,68 @@ static const struct net_offload gre_offload = { }, }; +static const struct gre_protocol ipgre_protocol = { + .handler = gre_cisco_rcv, + .err_handler = gre_cisco_err, +}; + +int gre_cisco_register(struct gre_cisco_protocol *newp) +{ + struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) + &gre_cisco_proto_list[newp->priority]; + + return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY; +} +EXPORT_SYMBOL_GPL(gre_cisco_register); + +int gre_cisco_unregister(struct gre_cisco_protocol *del_proto) +{ + struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) + &gre_cisco_proto_list[del_proto->priority]; + int ret; + + ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL; + + if (ret) + return ret; + + synchronize_net(); + return 0; +} +EXPORT_SYMBOL_GPL(gre_cisco_unregister); + static int __init gre_init(void) { pr_info("GRE over IPv4 demultiplexor driver\n"); if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { pr_err("can't add protocol\n"); - return -EAGAIN; + goto err; + } + + if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) { + pr_info("%s: can't add ipgre handler\n", __func__); + goto err_gre; } if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { pr_err("can't add protocol offload\n"); - inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); - return -EAGAIN; + goto err_gso; } return 0; +err_gso: + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); +err_gre: + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); +err: + return -EAGAIN; } static void __exit gre_exit(void) { inet_del_offload(&gre_offload, IPPROTO_GRE); + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } @@ -236,4 +450,3 @@ module_exit(gre_exit); MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); MODULE_LICENSE("GPL"); - diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a982657d05e..19863a81cea 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -121,103 +121,8 @@ static int ipgre_tunnel_init(struct net_device *dev); static int ipgre_net_id __read_mostly; static int gre_tap_net_id __read_mostly; -static __sum16 check_checksum(struct sk_buff *skb) -{ - __sum16 csum = 0; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - - if (!csum) - break; - /* Fall through. */ - - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - break; - } - - return csum; -} - -static int ip_gre_calc_hlen(__be16 o_flags) -{ - int addend = 4; - - if (o_flags&TUNNEL_CSUM) - addend += 4; - if (o_flags&TUNNEL_KEY) - addend += 4; - if (o_flags&TUNNEL_SEQ) - addend += 4; - return addend; -} - -static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, int *hdr_len) -{ - unsigned int ip_hlen = ip_hdrlen(skb); - const struct gre_base_hdr *greh; - __be32 *options; - - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) - return -EINVAL; - - greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); - if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) - return -EINVAL; - - tpi->flags = gre_flags_to_tnl_flags(greh->flags); - *hdr_len = ip_gre_calc_hlen(tpi->flags); - - if (!pskb_may_pull(skb, *hdr_len)) - return -EINVAL; - - greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); - - tpi->proto = greh->protocol; - - options = (__be32 *)(greh + 1); - if (greh->flags & GRE_CSUM) { - if (check_checksum(skb)) { - *csum_err = true; - return -EINVAL; - } - options++; - } - - if (greh->flags & GRE_KEY) { - tpi->key = *options; - options++; - } else - tpi->key = 0; - - if (unlikely(greh->flags & GRE_SEQ)) { - tpi->seq = *options; - options++; - } else - tpi->seq = 0; - - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { - tpi->proto = htons(ETH_P_IP); - if ((*(u8 *)options & 0xF0) != 0x40) { - *hdr_len += 4; - if (!pskb_may_pull(skb, *hdr_len)) - return -EINVAL; - } - } - - return 0; -} - -static void ipgre_err(struct sk_buff *skb, u32 info) +static int ipgre_err(struct sk_buff *skb, u32 info, + const struct tnl_ptk_info *tpi) { /* All the routers (except for Linux) return only @@ -239,26 +144,18 @@ static void ipgre_err(struct sk_buff *skb, u32 info) const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; - struct tnl_ptk_info tpi; - int hdr_len; - bool csum_err = false; - - if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) { - if (!csum_err) /* ignore csum errors. */ - return; - } switch (type) { default: case ICMP_PARAMETERPROB: - return; + return PACKET_RCVD; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: case ICMP_PORT_UNREACH: /* Impossible event. */ - return; + return PACKET_RCVD; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -269,79 +166,61 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) - return; + return PACKET_RCVD; break; case ICMP_REDIRECT: break; } - if (tpi.proto == htons(ETH_P_TEB)) + if (tpi->proto == htons(ETH_P_TEB)) itn = net_generic(net, gre_tap_net_id); else itn = net_generic(net, ipgre_net_id); iph = (const struct iphdr *)skb->data; - t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, - iph->daddr, iph->saddr, tpi.key); + t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, + iph->daddr, iph->saddr, tpi->key); if (t == NULL) - return; + return PACKET_REJECT; - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_GRE, 0); - return; - } - if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - IPPROTO_GRE, 0); - return; - } if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) - return; + return PACKET_RCVD; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - return; + return PACKET_RCVD; if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; + return PACKET_RCVD; } -static int ipgre_rcv(struct sk_buff *skb) +static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) { struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn; const struct iphdr *iph; struct ip_tunnel *tunnel; - struct tnl_ptk_info tpi; - int hdr_len; - bool csum_err = false; - - if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0) - goto drop; - if (tpi.proto == htons(ETH_P_TEB)) + if (tpi->proto == htons(ETH_P_TEB)) itn = net_generic(net, gre_tap_net_id); else itn = net_generic(net, ipgre_net_id); iph = ip_hdr(skb); - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, - iph->saddr, iph->daddr, tpi.key); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, + iph->saddr, iph->daddr, tpi->key); if (tunnel) { - ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); - return 0; + ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error); + return PACKET_RCVD; } - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); -drop: - kfree_skb(skb); - return 0; + return PACKET_REJECT; } static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb) @@ -708,9 +587,10 @@ static int ipgre_tunnel_init(struct net_device *dev) return ip_tunnel_init(dev); } -static const struct gre_protocol ipgre_protocol = { - .handler = ipgre_rcv, - .err_handler = ipgre_err, +static struct gre_cisco_protocol ipgre_protocol = { + .handler = ipgre_rcv, + .err_handler = ipgre_err, + .priority = 0, }; static int __net_init ipgre_init_net(struct net *net) @@ -978,7 +858,7 @@ static int __init ipgre_init(void) if (err < 0) goto pnet_tap_faied; - err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); + err = gre_cisco_register(&ipgre_protocol); if (err < 0) { pr_info("%s: can't add protocol\n", __func__); goto add_proto_failed; @@ -997,7 +877,7 @@ static int __init ipgre_init(void) tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); + gre_cisco_unregister(&ipgre_protocol); add_proto_failed: unregister_pernet_device(&ipgre_tap_net_ops); pnet_tap_faied: @@ -1009,8 +889,7 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) - pr_info("%s: can't remove protocol\n", __func__); + gre_cisco_unregister(&ipgre_protocol); unregister_pernet_device(&ipgre_tap_net_ops); unregister_pernet_device(&ipgre_net_ops); } -- cgit v1.2.3 From 752f36da68e9136df8918461d651723a43627e04 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:49:45 -0700 Subject: gre: export gre_build_header() function. This is required for ovs gre module. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/gre.c | 32 ++++++++++++++++++++++++++++++++ net/ipv4/ip_gre.c | 40 +--------------------------------------- 2 files changed, 33 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 8b9a373890a..1cbc46536d1 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -61,6 +61,38 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, + int hdr_len) +{ + struct gre_base_hdr *greh; + + skb_push(skb, hdr_len); + + greh = (struct gre_base_hdr *)skb->data; + greh->flags = tnl_flags_to_gre_flags(tpi->flags); + greh->protocol = tpi->proto; + + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + if (tpi->flags&TUNNEL_SEQ) { + *ptr = tpi->seq; + ptr--; + } + if (tpi->flags&TUNNEL_KEY) { + *ptr = tpi->key; + ptr--; + } + if (tpi->flags&TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + *ptr = 0; + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, + skb->len, 0)); + } + } +} +EXPORT_SYMBOL_GPL(gre_build_header); + static __sum16 check_checksum(struct sk_buff *skb) { __sum16 csum = 0; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 19863a81cea..362c7c4c13c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -248,40 +248,6 @@ error: return ERR_PTR(err); } -static struct sk_buff *gre_build_header(struct sk_buff *skb, - const struct tnl_ptk_info *tpi, - int hdr_len) -{ - struct gre_base_hdr *greh; - - skb_push(skb, hdr_len); - - greh = (struct gre_base_hdr *)skb->data; - greh->flags = tnl_flags_to_gre_flags(tpi->flags); - greh->protocol = tpi->proto; - - if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - if (tpi->flags&TUNNEL_SEQ) { - *ptr = tpi->seq; - ptr--; - } - if (tpi->flags&TUNNEL_KEY) { - *ptr = tpi->key; - ptr--; - } - if (tpi->flags&TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { - *(__sum16 *)ptr = 0; - *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, - skb->len, 0)); - } - } - - return skb; -} - static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, __be16 proto) @@ -302,11 +268,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, tpi.seq = htonl(tunnel->o_seqno); /* Push GRE header. */ - skb = gre_build_header(skb, &tpi, tunnel->hlen); - if (unlikely(!skb)) { - dev->stats.tx_dropped++; - return; - } + gre_build_header(skb, &tpi, tunnel->hlen); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } -- cgit v1.2.3 From 45f2e9976cb6fc3f1cc533fd53fe74da5a9dbce4 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:49:51 -0700 Subject: gre: export gre_handle_offloads() function. This is required for OVS GRE offloading. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/gre.c | 29 +++++++++++++++++++++++++++++ net/ipv4/ip_gre.c | 34 ++-------------------------------- 2 files changed, 31 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 1cbc46536d1..5ecc9c49b4d 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -93,6 +93,35 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, } EXPORT_SYMBOL_GPL(gre_build_header); +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) +{ + int err; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + return skb; +error: + kfree_skb(skb); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(gre_handle_offloads); + static __sum16 check_checksum(struct sk_buff *skb) { __sum16 csum = 0; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 362c7c4c13c..c326e869993 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -223,31 +223,6 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) return PACKET_REJECT; } -static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb) -{ - int err; - - if (skb_is_gso(skb)) { - err = skb_unclone(skb, GFP_ATOMIC); - if (unlikely(err)) - goto error; - skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; - return skb; - } else if (skb->ip_summed == CHECKSUM_PARTIAL && - tunnel->parms.o_flags&TUNNEL_CSUM) { - err = skb_checksum_help(skb); - if (unlikely(err)) - goto error; - } else if (skb->ip_summed != CHECKSUM_PARTIAL) - skb->ip_summed = CHECKSUM_NONE; - - return skb; - -error: - kfree_skb(skb); - return ERR_PTR(err); -} - static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, __be16 proto) @@ -255,11 +230,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel *tunnel = netdev_priv(dev); struct tnl_ptk_info tpi; - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - tpi.flags = tunnel->parms.o_flags; tpi.proto = proto; tpi.key = tunnel->parms.o_key; @@ -279,7 +249,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tnl_params; - skb = handle_offloads(tunnel, skb); + skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); if (IS_ERR(skb)) goto out; @@ -318,7 +288,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, { struct ip_tunnel *tunnel = netdev_priv(dev); - skb = handle_offloads(tunnel, skb); + skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); if (IS_ERR(skb)) goto out; -- cgit v1.2.3 From 0e6fbc5b6c6218987c93b8c7ca60cf786062899d Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:49:56 -0700 Subject: ip_tunnels: extend iptunnel_xmit() Refactor various ip tunnels xmit functions and extend iptunnel_xmit() so that there is more code sharing. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/Makefile | 2 +- net/ipv4/ip_tunnel.c | 38 +++++--------------- net/ipv4/ip_tunnel_core.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/sit.c | 39 ++++++--------------- 4 files changed, 108 insertions(+), 59 deletions(-) create mode 100644 net/ipv4/ip_tunnel_core.c (limited to 'net') diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 7fcf8101d85..86ded0bac9c 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ - inet_fragment.o ping.o + inet_fragment.o ping.o ip_tunnel_core.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e189db409b0..a06a2ed4959 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -491,19 +491,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *inner_iph; - struct iphdr *iph; struct flowi4 fl4; u8 tos, ttl; __be16 df; struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int mtu; + int err; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ @@ -571,14 +569,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dev->stats.tx_carrier_errors++; goto tx_error; } - tdev = rt->dst.dev; - - if (tdev == dev) { + if (rt->dst.dev == dev) { ip_rt_put(rt); dev->stats.collisions++; goto tx_error; } - df = tnl_params->frag_off; if (df) @@ -596,6 +591,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, if (!skb_is_gso(skb) && (inner_iph->frag_off&htons(IP_DF)) && mtu < ntohs(inner_iph->tot_len)) { + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); goto tx_error; @@ -646,8 +642,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } - max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) - + rt->dst.header_len; + max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + + rt->dst.header_len; if (max_headroom > dev->needed_headroom) { dev->needed_headroom = max_headroom; if (skb_cow_head(skb, dev->needed_headroom)) { @@ -657,27 +653,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* Push down and install the IP header. */ - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - - iph = ip_hdr(skb); - inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + err = iptunnel_xmit(dev_net(dev), rt, skb, + fl4.saddr, fl4.daddr, protocol, + ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; - iph->protocol = protocol; - iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - iph->ttl = ttl; - tunnel_ip_select_ident(skb, inner_iph, &rt->dst); - - iptunnel_xmit(skb, dev); return; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c new file mode 100644 index 00000000000..927687e83f1 --- /dev/null +++ b/net/ipv4/ip_tunnel_core.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int iptunnel_xmit(struct net *net, struct rtable *rt, + struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, + __u8 tos, __u8 ttl, __be16 df) +{ + int pkt_len = skb->len; + struct iphdr *iph; + int err; + + nf_reset(skb); + secpath_reset(skb); + skb->rxhash = 0; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + /* Push down and install the IP header. */ + __skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = proto; + iph->tos = tos; + iph->daddr = dst; + iph->saddr = src; + iph->ttl = ttl; + tunnel_ip_select_ident(skb, + (const struct iphdr *)skb_inner_network_header(skb), + &rt->dst); + + err = ip_local_out(skb); + if (unlikely(net_xmit_eval(err))) + pkt_len = 0; + return pkt_len; +} +EXPORT_SYMBOL_GPL(iptunnel_xmit); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6b9c1f128ea..76bb8de435b 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -723,13 +723,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ - struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; const struct in6_addr *addr6; int addr_type; + u8 ttl; + int err; if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; @@ -872,34 +873,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb = new_skb; iph6 = ipv6_hdr(skb); } - - skb->transport_header = skb->network_header; - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags = 0; - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* - * Push down and install the IPIP header. - */ - - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr)>>2; - iph->frag_off = df; - iph->protocol = IPPROTO_IPV6; - iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - - if ((iph->ttl = tiph->ttl) == 0) - iph->ttl = iph6->hop_limit; - - skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(iph, skb_dst(skb), NULL); - iptunnel_xmit(skb, dev); + ttl = tiph->ttl; + if (ttl == 0) + ttl = iph6->hop_limit; + tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); + + err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, fl4.daddr, + IPPROTO_IPV6, tos, ttl, df); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; tx_error_icmp: -- cgit v1.2.3 From 3d7b46cd20e300bd6989fb1f43d46f1b9645816e Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:02 -0700 Subject: ip_tunnel: push generic protocol handling to ip_tunnel module. Process skb tunnel header before sending packet to protocol handler. this allows code sharing between gre and ovs gre modules. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/gre.c | 3 ++- net/ipv4/ip_tunnel.c | 30 ++++++------------------------ net/ipv4/ip_tunnel_core.c | 34 ++++++++++++++++++++++++++++++++++ net/ipv4/ipip.c | 6 +++++- net/ipv6/sit.c | 7 ++++++- 5 files changed, 53 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 5ecc9c49b4d..ba4803e609b 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -201,7 +201,8 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return 0; + + return iptunnel_pull_header(skb, hdr_len, tpi->proto); } static int gre_cisco_rcv(struct sk_buff *skb) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index a06a2ed4959..bd227e5ea9d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -408,13 +408,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); int err; - secpath_reset(skb); - - skb->protocol = tpi->proto; - - skb->mac_header = skb->network_header; - __pskb_pull(skb, tunnel->hlen); - skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { /* Looped back packet, drop it! */ @@ -442,23 +435,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } - /* Warning: All skb pointers will be invalidated! */ - if (tunnel->dev->type == ARPHRD_ETHER) { - if (!pskb_may_pull(skb, ETH_HLEN)) { - tunnel->dev->stats.rx_length_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - - iph = ip_hdr(skb); - skb->protocol = eth_type_trans(skb, tunnel->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - } - - skb->pkt_type = PACKET_HOST; - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -477,6 +453,12 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); + if (tunnel->dev->type == ARPHRD_ETHER) { + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } else { + skb->dev = tunnel->dev; + } gro_cells_receive(&tunnel->gro_cells, skb); return 0; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 927687e83f1..7167b08977d 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -86,3 +86,37 @@ int iptunnel_xmit(struct net *net, struct rtable *rt, return pkt_len; } EXPORT_SYMBOL_GPL(iptunnel_xmit); + +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) +{ + if (unlikely(!pskb_may_pull(skb, hdr_len))) + return -ENOMEM; + + skb_pull_rcsum(skb, hdr_len); + + if (inner_proto == htons(ETH_P_TEB)) { + struct ethhdr *eh = (struct ethhdr *)skb->data; + + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + return -ENOMEM; + + if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) + skb->protocol = eh->h_proto; + else + skb->protocol = htons(ETH_P_802_2); + + } else { + skb->protocol = inner_proto; + } + + nf_reset(skb); + secpath_reset(skb); + if (!skb->l4_rxhash) + skb->rxhash = 0; + skb_dst_drop(skb); + skb->vlan_tci = 0; + skb_set_queue_mapping(skb, 0); + skb->pkt_type = PACKET_HOST; + return 0; +} +EXPORT_SYMBOL_GPL(iptunnel_pull_header); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9df7ecd393f..e6905fbda2a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -188,8 +188,12 @@ static int ipip_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); struct ip_tunnel *tunnel; - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; + + iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 76bb8de435b..6cee844678e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -640,9 +640,14 @@ static const struct tnl_ptk_info tpi = { static int ipip_rcv(struct sk_buff *skb) { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; struct ip_tunnel *tunnel; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; + + iph = ip_hdr(skb); + tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { -- cgit v1.2.3 From 74f84a5726c7d08c27745305e67474b8645c541d Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:12 -0700 Subject: openvswitch: Copy individual actions. Rather than validating actions and then copying all actiaons in one block, following patch does same operation in single pass. This validate and copy action one by one. This is required for ovs tunneling patch. This patch does not change any functionality. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 271 ++++++++++++++++++++++++++++++++++++--------- net/openvswitch/flow.c | 10 +- net/openvswitch/flow.h | 2 +- 3 files changed, 225 insertions(+), 58 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0f783d9fa00..f14816b80b8 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -464,16 +464,89 @@ static int flush_flows(struct datapath *dp) return 0; } -static int validate_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth); +static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len) +{ + + struct sw_flow_actions *acts; + int new_acts_size; + int req_size = NLA_ALIGN(attr_len); + int next_offset = offsetof(struct sw_flow_actions, actions) + + (*sfa)->actions_len; + + if (req_size <= (ksize(*sfa) - next_offset)) + goto out; + + new_acts_size = ksize(*sfa) * 2; + + if (new_acts_size > MAX_ACTIONS_BUFSIZE) { + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + return ERR_PTR(-EMSGSIZE); + new_acts_size = MAX_ACTIONS_BUFSIZE; + } + + acts = ovs_flow_actions_alloc(new_acts_size); + if (IS_ERR(acts)) + return (void *)acts; + + memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); + acts->actions_len = (*sfa)->actions_len; + kfree(*sfa); + *sfa = acts; + +out: + (*sfa)->actions_len += req_size; + return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); +} + +static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) +{ + struct nlattr *a; + + a = reserve_sfa_size(sfa, nla_attr_size(len)); + if (IS_ERR(a)) + return PTR_ERR(a); + + a->nla_type = attrtype; + a->nla_len = nla_attr_size(len); + + if (data) + memcpy(nla_data(a), data, len); + memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); + + return 0; +} + +static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype) +{ + int used = (*sfa)->actions_len; + int err; + + err = add_action(sfa, attrtype, NULL, 0); + if (err) + return err; + + return used; +} -static int validate_sample(const struct nlattr *attr, - const struct sw_flow_key *key, int depth) +static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset) +{ + struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset); + + a->nla_len = sfa->actions_len - st_offset; +} + +static int validate_and_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa); + +static int validate_and_copy_sample(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; const struct nlattr *a; - int rem; + int rem, start, err, st_acts; memset(attrs, 0, sizeof(attrs)); nla_for_each_nested(a, attr, rem) { @@ -492,7 +565,26 @@ static int validate_sample(const struct nlattr *attr, actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) return -EINVAL; - return validate_actions(actions, key, depth + 1); + + /* validation done, copy sample action. */ + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); + if (start < 0) + return start; + err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32)); + if (err) + return err; + st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); + if (st_acts < 0) + return st_acts; + + err = validate_and_copy_actions(actions, key, depth + 1, sfa); + if (err) + return err; + + add_nested_action_end(*sfa, st_acts); + add_nested_action_end(*sfa, start); + + return 0; } static int validate_tp_port(const struct sw_flow_key *flow_key) @@ -606,8 +698,24 @@ static int validate_userspace(const struct nlattr *attr) return 0; } -static int validate_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth) +static int copy_action(const struct nlattr *from, + struct sw_flow_actions **sfa) +{ + int totlen = NLA_ALIGN(from->nla_len); + struct nlattr *to; + + to = reserve_sfa_size(sfa, from->nla_len); + if (IS_ERR(to)) + return PTR_ERR(to); + + memcpy(to, from, totlen); + return 0; +} + +static int validate_and_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, + struct sw_flow_actions **sfa) { const struct nlattr *a; int rem, err; @@ -627,12 +735,14 @@ static int validate_actions(const struct nlattr *attr, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); + bool skip_copy; if (type > OVS_ACTION_ATTR_MAX || (action_lens[type] != nla_len(a) && action_lens[type] != (u32)-1)) return -EINVAL; + skip_copy = false; switch (type) { case OVS_ACTION_ATTR_UNSPEC: return -EINVAL; @@ -667,14 +777,20 @@ static int validate_actions(const struct nlattr *attr, break; case OVS_ACTION_ATTR_SAMPLE: - err = validate_sample(a, key, depth); + err = validate_and_copy_sample(a, key, depth, sfa); if (err) return err; + skip_copy = true; break; default: return -EINVAL; } + if (!skip_copy) { + err = copy_action(a, sfa); + if (err) + return err; + } } if (rem > 0) @@ -742,18 +858,16 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; - - err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0); - if (err) - goto err_flow_free; - flow->hash = ovs_flow_hash(&flow->key, key_len); - - acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]); + acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); err = PTR_ERR(acts); if (IS_ERR(acts)) goto err_flow_free; + + err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts); rcu_assign_pointer(flow->sf_acts, acts); + if (err) + goto err_flow_free; OVS_CB(packet)->flow = flow; packet->priority = flow->key.phy.priority; @@ -843,6 +957,66 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP }; +static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb); +static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +{ + const struct nlattr *a; + struct nlattr *start; + int err = 0, rem; + + start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); + if (!start) + return -EMSGSIZE; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + struct nlattr *st_sample; + + switch (type) { + case OVS_SAMPLE_ATTR_PROBABILITY: + if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a))) + return -EMSGSIZE; + break; + case OVS_SAMPLE_ATTR_ACTIONS: + st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + if (!st_sample) + return -EMSGSIZE; + err = actions_to_attr(nla_data(a), nla_len(a), skb); + if (err) + return err; + nla_nest_end(skb, st_sample); + break; + } + } + + nla_nest_end(skb, start); + return err; +} + +static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) +{ + const struct nlattr *a; + int rem, err; + + nla_for_each_attr(a, attr, len, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_ACTION_ATTR_SAMPLE: + err = sample_action_to_attr(a, skb); + if (err) + return err; + break; + default: + if (nla_put(skb, type, nla_len(a), nla_data(a))) + return -EMSGSIZE; + break; + } + } + + return 0; +} + static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -860,6 +1034,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, { const int skb_orig_len = skb->len; const struct sw_flow_actions *sf_acts; + struct nlattr *start; struct ovs_flow_stats stats; struct ovs_header *ovs_header; struct nlattr *nla; @@ -913,10 +1088,19 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, * This can only fail for dump operations because the skb is always * properly sized for single flows. */ - err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len, - sf_acts->actions); - if (err < 0 && skb_orig_len) - goto error; + start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); + if (start) { + err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb); + if (!err) + nla_nest_end(skb, start); + else { + if (skb_orig_len) + goto error; + + nla_nest_cancel(skb, start); + } + } else if (skb_orig_len) + goto nla_put_failure; return genlmsg_end(skb, ovs_header); @@ -961,6 +1145,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct datapath *dp; struct flow_table *table; + struct sw_flow_actions *acts = NULL; int error; int key_len; @@ -974,9 +1159,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); - if (error) + acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) goto error; + + error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts); + if (error) + goto err_kfree; } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { error = -EINVAL; goto error; @@ -991,8 +1181,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) table = ovsl_dereference(dp->table); flow = ovs_flow_tbl_lookup(table, &key, key_len); if (!flow) { - struct sw_flow_actions *acts; - /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) @@ -1019,11 +1207,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->key = key; clear_stats(flow); - /* Obtain actions. */ - acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); - error = PTR_ERR(acts); - if (IS_ERR(acts)) - goto error_free_flow; rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ @@ -1036,7 +1219,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } else { /* We found a matching flow. */ struct sw_flow_actions *old_acts; - struct nlattr *acts_attrs; /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL @@ -1051,21 +1233,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); - acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; - if (acts_attrs && - (old_acts->actions_len != nla_len(acts_attrs) || - memcmp(old_acts->actions, nla_data(acts_attrs), - old_acts->actions_len))) { - struct sw_flow_actions *new_acts; - - new_acts = ovs_flow_actions_alloc(acts_attrs); - error = PTR_ERR(new_acts); - if (IS_ERR(new_acts)) - goto err_unlock_ovs; - - rcu_assign_pointer(flow->sf_acts, new_acts); - ovs_flow_deferred_free_acts(old_acts); - } + rcu_assign_pointer(flow->sf_acts, acts); + ovs_flow_deferred_free_acts(old_acts); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -1086,10 +1255,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); return 0; -error_free_flow: - ovs_flow_free(flow); err_unlock_ovs: ovs_unlock(); +err_kfree: + kfree(acts); error: return error; } @@ -1866,8 +2035,8 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_DEL); + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, + info->snd_seq, OVS_VPORT_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) goto exit_unlock; @@ -1896,8 +2065,8 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, + info->snd_seq, OVS_VPORT_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) goto exit_unlock; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 093c191d4fc..940d4b803ff 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -198,20 +198,18 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) spin_unlock(&flow->lock); } -struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) +struct sw_flow_actions *ovs_flow_actions_alloc(int size) { - int actions_len = nla_len(actions); struct sw_flow_actions *sfa; - if (actions_len > MAX_ACTIONS_BUFSIZE) + if (size > MAX_ACTIONS_BUFSIZE) return ERR_PTR(-EINVAL); - sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); + sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); - sfa->actions_len = actions_len; - nla_memcpy(sfa->actions, actions, actions_len); + sfa->actions_len = 0; return sfa; } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 2a83e2141f0..e370f6246ee 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -130,7 +130,7 @@ struct sw_flow *ovs_flow_alloc(void); void ovs_flow_deferred_free(struct sw_flow *); void ovs_flow_free(struct sw_flow *flow); -struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *); +struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); void ovs_flow_deferred_free_acts(struct sw_flow_actions *); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, -- cgit v1.2.3 From 7d5437c709ded4f152cb8b305d17972d6707f20c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:18 -0700 Subject: openvswitch: Add tunneling interface. Add ovs tunnel interface for set tunnel action for userspace. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 4 ++ net/openvswitch/datapath.c | 78 +++++++++++++++++++++- net/openvswitch/datapath.h | 3 + net/openvswitch/flow.c | 125 +++++++++++++++++++++++++++++++++++ net/openvswitch/flow.h | 19 ++++++ net/openvswitch/vport-internal_dev.c | 2 +- net/openvswitch/vport-netdev.c | 2 +- net/openvswitch/vport.c | 4 +- net/openvswitch/vport.h | 3 +- 9 files changed, 233 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 596d6373399..22c5f399f1c 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -436,6 +436,10 @@ static int execute_set_action(struct sk_buff *skb, skb->mark = nla_get_u32(nested_attr); break; + case OVS_KEY_ATTR_IPV4_TUNNEL: + OVS_CB(skb)->tun_key = nla_data(nested_attr); + break; + case OVS_KEY_ATTR_ETHERNET: err = set_eth_addr(skb, nla_data(nested_attr)); break; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f14816b80b8..bbd310646bc 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -362,6 +362,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, static size_t key_attr_size(void) { return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ + + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ @@ -600,8 +608,30 @@ static int validate_tp_port(const struct sw_flow_key *flow_key) return -EINVAL; } +static int validate_and_copy_set_tun(const struct nlattr *attr, + struct sw_flow_actions **sfa) +{ + struct ovs_key_ipv4_tunnel tun_key; + int err, start; + + err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key); + if (err) + return err; + + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); + if (start < 0) + return start; + + err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key)); + add_nested_action_end(*sfa, start); + + return err; +} + static int validate_set(const struct nlattr *a, - const struct sw_flow_key *flow_key) + const struct sw_flow_key *flow_key, + struct sw_flow_actions **sfa, + bool *set_tun) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -611,18 +641,27 @@ static int validate_set(const struct nlattr *a, return -EINVAL; if (key_type > OVS_KEY_ATTR_MAX || - nla_len(ovs_key) != ovs_key_lens[key_type]) + (ovs_key_lens[key_type] != nla_len(ovs_key) && + ovs_key_lens[key_type] != -1)) return -EINVAL; switch (key_type) { const struct ovs_key_ipv4 *ipv4_key; const struct ovs_key_ipv6 *ipv6_key; + int err; case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_ETHERNET: break; + case OVS_KEY_ATTR_TUNNEL: + *set_tun = true; + err = validate_and_copy_set_tun(a, sfa); + if (err) + return err; + break; + case OVS_KEY_ATTR_IPV4: if (flow_key->eth.type != htons(ETH_P_IP)) return -EINVAL; @@ -771,7 +810,7 @@ static int validate_and_copy_actions(const struct nlattr *attr, break; case OVS_ACTION_ATTR_SET: - err = validate_set(a, key); + err = validate_set(a, key, sfa, &skip_copy); if (err) return err; break; @@ -993,6 +1032,33 @@ static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) return err; } +static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) +{ + const struct nlattr *ovs_key = nla_data(a); + int key_type = nla_type(ovs_key); + struct nlattr *start; + int err; + + switch (key_type) { + case OVS_KEY_ATTR_IPV4_TUNNEL: + start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + if (!start) + return -EMSGSIZE; + + err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key)); + if (err) + return err; + nla_nest_end(skb, start); + break; + default: + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) + return -EMSGSIZE; + break; + } + + return 0; +} + static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) { const struct nlattr *a; @@ -1002,6 +1068,12 @@ static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *s int type = nla_type(a); switch (type) { + case OVS_ACTION_ATTR_SET: + err = set_action_to_attr(a, skb); + if (err) + return err; + break; + case OVS_ACTION_ATTR_SAMPLE: err = sample_action_to_attr(a, skb); if (err) diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 16b84069521..e88ebc2f1c5 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -88,9 +88,12 @@ struct datapath { /** * struct ovs_skb_cb - OVS data in skb CB * @flow: The flow associated with this packet. May be %NULL if no flow. + * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the + * packet is not being tunneled. */ struct ovs_skb_cb { struct sw_flow *flow; + struct ovs_key_ipv4_tunnel *tun_key; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 940d4b803ff..976a8b766a6 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -603,6 +604,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memset(key, 0, sizeof(*key)); key->phy.priority = skb->priority; + if (OVS_CB(skb)->tun_key) + memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key)); key->phy.in_port = in_port; key->phy.skb_mark = skb->mark; @@ -818,6 +821,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), + [OVS_KEY_ATTR_TUNNEL] = -1, }; static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, @@ -955,6 +959,105 @@ static int parse_flow_nlattrs(const struct nlattr *attr, return 0; } +int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, + struct ovs_key_ipv4_tunnel *tun_key) +{ + struct nlattr *a; + int rem; + bool ttl = false; + + memset(tun_key, 0, sizeof(*tun_key)); + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_TOS] = 1, + [OVS_TUNNEL_KEY_ATTR_TTL] = 1, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, + [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + }; + + if (type > OVS_TUNNEL_KEY_ATTR_MAX || + ovs_tunnel_key_lens[type] != nla_len(a)) + return -EINVAL; + + switch (type) { + case OVS_TUNNEL_KEY_ATTR_ID: + tun_key->tun_id = nla_get_be64(a); + tun_key->tun_flags |= TUNNEL_KEY; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + tun_key->ipv4_src = nla_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + tun_key->ipv4_dst = nla_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + tun_key->ipv4_tos = nla_get_u8(a); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + tun_key->ipv4_ttl = nla_get_u8(a); + ttl = true; + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + tun_key->tun_flags |= TUNNEL_CSUM; + break; + default: + return -EINVAL; + + } + } + if (rem > 0) + return -EINVAL; + + if (!tun_key->ipv4_dst) + return -EINVAL; + + if (!ttl) + return -EINVAL; + + return 0; +} + +int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *tun_key) +{ + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); + if (!nla) + return -EMSGSIZE; + + if (tun_key->tun_flags & TUNNEL_KEY && + nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id)) + return -EMSGSIZE; + if (tun_key->ipv4_src && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src)) + return -EMSGSIZE; + if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst)) + return -EMSGSIZE; + if (tun_key->ipv4_tos && + nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos)) + return -EMSGSIZE; + if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_CSUM) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) + return -EMSGSIZE; + + nla_nest_end(skb, nla); + return 0; +} + /** * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. * @swkey: receives the extracted flow key. @@ -997,6 +1100,14 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); } + if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { + err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key); + if (err) + return err; + + attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); + } + /* Data attributes. */ if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) return -EINVAL; @@ -1135,17 +1246,21 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, const struct nlattr *attr) { + struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; const struct nlattr *nla; int rem; flow->key.phy.in_port = DP_MAX_PORTS; flow->key.phy.priority = 0; flow->key.phy.skb_mark = 0; + memset(tun_key, 0, sizeof(flow->key.tun_key)); nla_for_each_nested(nla, attr, rem) { int type = nla_type(nla); if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) { + int err; + if (nla_len(nla) != ovs_key_lens[type]) return -EINVAL; @@ -1154,6 +1269,12 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, flow->key.phy.priority = nla_get_u32(nla); break; + case OVS_KEY_ATTR_TUNNEL: + err = ovs_ipv4_tun_from_nlattr(nla, tun_key); + if (err) + return err; + break; + case OVS_KEY_ATTR_IN_PORT: if (nla_get_u32(nla) >= DP_MAX_PORTS) return -EINVAL; @@ -1180,6 +1301,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) goto nla_put_failure; + if (swkey->tun_key.ipv4_dst && + ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key)) + goto nla_put_failure; + if (swkey->phy.in_port != DP_MAX_PORTS && nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) goto nla_put_failure; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index e370f6246ee..aec5e43f690 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -40,7 +40,22 @@ struct sw_flow_actions { struct nlattr actions[]; }; +/* Used to memset ovs_key_ipv4_tunnel padding. */ +#define OVS_TUNNEL_KEY_SIZE \ + (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \ + FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl)) + +struct ovs_key_ipv4_tunnel { + __be64 tun_id; + __be32 ipv4_src; + __be32 ipv4_dst; + u16 tun_flags; + u8 ipv4_tos; + u8 ipv4_ttl; +}; + struct sw_flow_key { + struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ struct { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ @@ -179,5 +194,9 @@ u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len); struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; +int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, + struct ovs_key_ipv4_tunnel *tun_key); +int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *tun_key); #endif /* flow.h */ diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index e284c7e1fec..98d3edbbc23 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -67,7 +67,7 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { rcu_read_lock(); - ovs_vport_receive(internal_dev_priv(netdev)->vport, skb); + ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL); rcu_read_unlock(); return 0; } diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 40de815b421..5982f3f6283 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -51,7 +51,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) skb_push(skb, ETH_HLEN); ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); - ovs_vport_receive(vport, skb); + ovs_vport_receive(vport, skb, NULL); return; error: diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 176d449351e..413287a1877 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -325,7 +325,8 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. */ -void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) +void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, + struct ovs_key_ipv4_tunnel *tun_key) { struct pcpu_tstats *stats; @@ -335,6 +336,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) stats->rx_bytes += skb->len; u64_stats_update_end(&stats->syncp); + OVS_CB(skb)->tun_key = tun_key; ovs_dp_process_received_packet(vport, skb); } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 293278c4c2d..2d961aedd71 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -184,7 +184,8 @@ static inline struct vport *vport_from_priv(const void *priv) return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); } -void ovs_vport_receive(struct vport *, struct sk_buff *); +void ovs_vport_receive(struct vport *, struct sk_buff *, + struct ovs_key_ipv4_tunnel *); void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); /* List of statically compiled vport implementations. Don't forget to also -- cgit v1.2.3 From ffe3f4321745e743dd179ec2b12180c01ba0d3aa Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:23 -0700 Subject: openvswitch: Expand action buffer size. MAX_ACTIONS_BUFSIZE limits action list size, set tunnel action needs extra space on action list, for now increase max actions list limit. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/flow.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index aec5e43f690..bfe80b96075 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -159,7 +159,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, const struct nlattr *attr); -#define MAX_ACTIONS_BUFSIZE (16 * 1024) +#define MAX_ACTIONS_BUFSIZE (32 * 1024) #define TBL_MIN_BUCKETS 1024 struct flow_table { -- cgit v1.2.3 From a3e82996a8874c4cfe8c7f1be4d552018d8cba7e Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:28 -0700 Subject: openvswitch: Optimize flow key match for non tunnel flows. Following patch adds start offset for sw_flow-key, so that we can skip tunneling information in key for non-tunnel flows. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 7 ++----- net/openvswitch/flow.c | 49 ++++++++++++++++++++++++++++++++++------------ net/openvswitch/flow.h | 6 +++--- 3 files changed, 42 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index bbd310646bc..f7e3a0d84c4 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -894,10 +894,9 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); + err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; - flow->hash = ovs_flow_hash(&flow->key, key_len); acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); err = PTR_ERR(acts); if (IS_ERR(acts)) @@ -1276,14 +1275,12 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) error = PTR_ERR(flow); goto err_unlock_ovs; } - flow->key = key; clear_stats(flow); rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - flow->hash = ovs_flow_hash(&key, key_len); - ovs_flow_tbl_insert(table, flow); + ovs_flow_tbl_insert(table, flow, &key, key_len); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 976a8b766a6..5c519b121e1 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -353,6 +353,14 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la return NULL; } +static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) +{ + struct hlist_head *head; + head = find_bucket(table, flow->hash); + hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); + table->count++; +} + static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) { int old_ver; @@ -369,7 +377,7 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new head = flex_array_get(old->buckets, i); hlist_for_each_entry(flow, head, hash_node[old_ver]) - ovs_flow_tbl_insert(new, flow); + __flow_tbl_insert(new, flow); } old->keep_flows = true; } @@ -763,9 +771,18 @@ out: return error; } -u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len) +static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len) +{ + return jhash2((u32 *)((u8 *)key + key_start), + DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0); +} + +static int flow_key_start(struct sw_flow_key *key) { - return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), 0); + if (key->tun_key.ipv4_dst) + return 0; + else + return offsetof(struct sw_flow_key, phy); } struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, @@ -773,28 +790,31 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, { struct sw_flow *flow; struct hlist_head *head; + u8 *_key; + int key_start; u32 hash; - hash = ovs_flow_hash(key, key_len); + key_start = flow_key_start(key); + hash = ovs_flow_hash(key, key_start, key_len); + _key = (u8 *) key + key_start; head = find_bucket(table, hash); hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { if (flow->hash == hash && - !memcmp(&flow->key, key, key_len)) { + !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) { return flow; } } return NULL; } -void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) +void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_key *key, int key_len) { - struct hlist_head *head; - - head = find_bucket(table, flow->hash); - hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); - table->count++; + flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len); + memcpy(&flow->key, key, sizeof(flow->key)); + __flow_tbl_insert(table, flow); } void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) @@ -1235,6 +1255,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, /** * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. * @flow: Receives extracted in_port, priority, tun_key and skb_mark. + * @key_len: Length of key in @flow. Used for calculating flow hash. * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. * @@ -1243,7 +1264,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const struct nlattr *attr) { struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; @@ -1289,6 +1310,10 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, } if (rem) return -EINVAL; + + flow->hash = ovs_flow_hash(&flow->key, + flow_key_start(&flow->key), key_len); + return 0; } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index bfe80b96075..999842f247a 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -156,7 +156,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const struct nlattr *attr); #define MAX_ACTIONS_BUFSIZE (32 * 1024) @@ -188,9 +188,9 @@ void ovs_flow_tbl_deferred_destroy(struct flow_table *table); struct flow_table *ovs_flow_tbl_alloc(int new_size); struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); -void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow); +void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_key *key, int key_len); void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); -u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len); struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; -- cgit v1.2.3 From aa310701e787087dbfbccf1409982a96e16c57a6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 17 Jun 2013 17:50:33 -0700 Subject: openvswitch: Add gre tunnel support. Add gre vport implementation. Most of gre protocol processing is pushed to gre module. It make use of gre demultiplexer therefore it can co-exist with linux device based gre tunnels. Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/Kconfig | 2 + net/openvswitch/Makefile | 3 +- net/openvswitch/datapath.h | 1 + net/openvswitch/flow.h | 18 ++- net/openvswitch/vport-gre.c | 274 ++++++++++++++++++++++++++++++++++++++++++++ net/openvswitch/vport.c | 19 +++ net/openvswitch/vport.h | 7 ++ 7 files changed, 322 insertions(+), 2 deletions(-) create mode 100644 net/openvswitch/vport-gre.c (limited to 'net') diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index d9ea33c361b..9fbc04a31ed 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -19,6 +19,8 @@ config OPENVSWITCH which is able to accept configuration from a variety of sources and translate it into packet processing rules. + Open vSwitch GRE support depends on CONFIG_NET_IPGRE_DEMUX. + See http://openvswitch.org for more information and userspace utilities. diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 15e7384745c..01bddb2991e 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -10,5 +10,6 @@ openvswitch-y := \ dp_notify.o \ flow.o \ vport.o \ + vport-gre.o \ vport-internal_dev.o \ - vport-netdev.o \ + vport-netdev.o diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index e88ebc2f1c5..a9148648491 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -122,6 +122,7 @@ struct dp_upcall_info { struct ovs_net { struct list_head dps; struct work_struct dp_notify_work; + struct vport_net vport_net; }; extern int ovs_net_id; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 999842f247a..66ef7220293 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -49,11 +49,27 @@ struct ovs_key_ipv4_tunnel { __be64 tun_id; __be32 ipv4_src; __be32 ipv4_dst; - u16 tun_flags; + __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; }; +static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, + const struct iphdr *iph, __be64 tun_id, + __be16 tun_flags) +{ + tun_key->tun_id = tun_id; + tun_key->ipv4_src = iph->saddr; + tun_key->ipv4_dst = iph->daddr; + tun_key->ipv4_tos = iph->tos; + tun_key->ipv4_ttl = iph->ttl; + tun_key->tun_flags = tun_flags; + + /* clear struct padding. */ + memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0, + sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE); +} + struct sw_flow_key { struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ struct { diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c new file mode 100644 index 00000000000..3a8d1900aa7 --- /dev/null +++ b/net/openvswitch/vport-gre.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifdef CONFIG_NET_IPGRE_DEMUX +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "datapath.h" +#include "vport.h" + +/* Returns the least-significant 32 bits of a __be64. */ +static __be32 be64_get_low32(__be64 x) +{ +#ifdef __BIG_ENDIAN + return (__force __be32)x; +#else + return (__force __be32)((__force u64)x >> 32); +#endif +} + +static __be16 filter_tnl_flags(__be16 flags) +{ + return flags & (TUNNEL_CSUM | TUNNEL_KEY); +} + +static struct sk_buff *__build_header(struct sk_buff *skb, + int tunnel_hlen) +{ + const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; + struct tnl_ptk_info tpi; + + skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); + if (IS_ERR(skb)) + return NULL; + + tpi.flags = filter_tnl_flags(tun_key->tun_flags); + tpi.proto = htons(ETH_P_TEB); + tpi.key = be64_get_low32(tun_key->tun_id); + tpi.seq = 0; + gre_build_header(skb, &tpi, tunnel_hlen); + + return skb; +} + +static __be64 key_to_tunnel_id(__be32 key, __be32 seq) +{ +#ifdef __BIG_ENDIAN + return (__force __be64)((__force u64)seq << 32 | (__force u32)key); +#else + return (__force __be64)((__force u64)key << 32 | (__force u32)seq); +#endif +} + +/* Called with rcu_read_lock and BH disabled. */ +static int gre_rcv(struct sk_buff *skb, + const struct tnl_ptk_info *tpi) +{ + struct ovs_key_ipv4_tunnel tun_key; + struct ovs_net *ovs_net; + struct vport *vport; + __be64 key; + + ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); + vport = rcu_dereference(ovs_net->vport_net.gre_vport); + if (unlikely(!vport)) + return PACKET_REJECT; + + key = key_to_tunnel_id(tpi->key, tpi->seq); + ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, + filter_tnl_flags(tpi->flags)); + + ovs_vport_receive(vport, skb, &tun_key); + return PACKET_RCVD; +} + +static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct flowi4 fl; + struct rtable *rt; + int min_headroom; + int tunnel_hlen; + __be16 df; + int err; + + if (unlikely(!OVS_CB(skb)->tun_key)) { + err = -EINVAL; + goto error; + } + + /* Route lookup */ + memset(&fl, 0, sizeof(fl)); + fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst; + fl.saddr = OVS_CB(skb)->tun_key->ipv4_src; + fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos); + fl.flowi4_mark = skb->mark; + fl.flowi4_proto = IPPROTO_GRE; + + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + tunnel_hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags); + + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + + tunnel_hlen + sizeof(struct iphdr) + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { + int head_delta = SKB_DATA_ALIGN(min_headroom - + skb_headroom(skb) + + 16); + err = pskb_expand_head(skb, max_t(int, head_delta, 0), + 0, GFP_ATOMIC); + if (unlikely(err)) + goto err_free_rt; + } + + if (vlan_tx_tag_present(skb)) { + if (unlikely(!__vlan_put_tag(skb, + skb->vlan_proto, + vlan_tx_tag_get(skb)))) { + err = -ENOMEM; + goto err_free_rt; + } + skb->vlan_tci = 0; + } + + /* Push Tunnel header. */ + skb = __build_header(skb, tunnel_hlen); + if (unlikely(!skb)) { + err = 0; + goto err_free_rt; + } + + df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? + htons(IP_DF) : 0; + + skb->local_df = 1; + + return iptunnel_xmit(net, rt, skb, fl.saddr, + OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, + OVS_CB(skb)->tun_key->ipv4_tos, + OVS_CB(skb)->tun_key->ipv4_ttl, df); +err_free_rt: + ip_rt_put(rt); +error: + return err; +} + +static struct gre_cisco_protocol gre_protocol = { + .handler = gre_rcv, + .priority = 1, +}; + +static int gre_ports; +static int gre_init(void) +{ + int err; + + gre_ports++; + if (gre_ports > 1) + return 0; + + err = gre_cisco_register(&gre_protocol); + if (err) + pr_warn("cannot register gre protocol handler\n"); + + return err; +} + +static void gre_exit(void) +{ + gre_ports--; + if (gre_ports > 0) + return; + + gre_cisco_unregister(&gre_protocol); +} + +static const char *gre_get_name(const struct vport *vport) +{ + return vport_priv(vport); +} + +static struct vport *gre_create(const struct vport_parms *parms) +{ + struct net *net = ovs_dp_get_net(parms->dp); + struct ovs_net *ovs_net; + struct vport *vport; + int err; + + err = gre_init(); + if (err) + return ERR_PTR(err); + + ovs_net = net_generic(net, ovs_net_id); + if (ovsl_dereference(ovs_net->vport_net.gre_vport)) { + vport = ERR_PTR(-EEXIST); + goto error; + } + + vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms); + if (IS_ERR(vport)) + goto error; + + strncpy(vport_priv(vport), parms->name, IFNAMSIZ); + rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport); + return vport; + +error: + gre_exit(); + return vport; +} + +static void gre_tnl_destroy(struct vport *vport) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct ovs_net *ovs_net; + + ovs_net = net_generic(net, ovs_net_id); + + rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); + ovs_vport_deferred_free(vport); + gre_exit(); +} + +const struct vport_ops ovs_gre_vport_ops = { + .type = OVS_VPORT_TYPE_GRE, + .create = gre_create, + .destroy = gre_tnl_destroy, + .get_name = gre_get_name, + .send = gre_tnl_send, +}; +#endif diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 413287a1877..f52dfb9cb5a 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -38,6 +38,10 @@ static const struct vport_ops *vport_ops_list[] = { &ovs_netdev_vport_ops, &ovs_internal_vport_ops, + +#ifdef CONFIG_NET_IPGRE_DEMUX + &ovs_gre_vport_ops, +#endif }; /* Protected by RCU read lock for reading, ovs_mutex for writing. */ @@ -404,3 +408,18 @@ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) spin_unlock(&vport->stats_lock); } + +static void free_vport_rcu(struct rcu_head *rcu) +{ + struct vport *vport = container_of(rcu, struct vport, rcu); + + ovs_vport_free(vport); +} + +void ovs_vport_deferred_free(struct vport *vport) +{ + if (!vport) + return; + + call_rcu(&vport->rcu, free_vport_rcu); +} diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 2d961aedd71..376045c42f8 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -34,6 +34,11 @@ struct vport_parms; /* The following definitions are for users of the vport subsytem: */ +/* The following definitions are for users of the vport subsytem: */ +struct vport_net { + struct vport __rcu *gre_vport; +}; + int ovs_vport_init(void); void ovs_vport_exit(void); @@ -152,6 +157,7 @@ enum vport_err_type { struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, const struct vport_parms *); void ovs_vport_free(struct vport *); +void ovs_vport_deferred_free(struct vport *vport); #define VPORT_ALIGN 8 @@ -192,6 +198,7 @@ void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); * add yours to the list at the top of vport.c. */ extern const struct vport_ops ovs_netdev_vport_ops; extern const struct vport_ops ovs_internal_vport_ops; +extern const struct vport_ops ovs_gre_vport_ops; static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len) -- cgit v1.2.3 From cf89d6b2803ab99ac596f95d585c3057d2be645c Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 20 Jun 2013 10:01:32 +0800 Subject: neigh: no need to call lookup_neigh_parms in neigh_parms_alloc neigh_table.parms always exist and is initialized,kmemdup can use it to create new neigh_parms, actually lookup_neigh_parms here will return neigh_table.parms too. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/core/neighbour.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index decaa4b9db2..53eab513955 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1429,15 +1429,11 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { - struct neigh_parms *p, *ref; + struct neigh_parms *p; struct net *net = dev_net(dev); const struct net_device_ops *ops = dev->netdev_ops; - ref = lookup_neigh_parms(tbl, net, 0); - if (!ref) - return NULL; - - p = kmemdup(ref, sizeof(*p), GFP_KERNEL); + p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); if (p) { p->tbl = tbl; atomic_set(&p->refcnt, 1); -- cgit v1.2.3 From 170d6f99541600ec7512f1d2b0b0c349009098d2 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 20 Jun 2013 10:01:33 +0800 Subject: neigh: only allow init_net to change the default neigh_parms Though we don't export the /proc/sys/net/ipv[4,6]/neigh/default/ directory to the un-init_net, but we can still use cmd such as "ip ntable change name arp_cache locktime 129" to change the locktime of default neigh_parms. This patch disallows the un-init_net to find out the neigh_table.parms. So the un-init_net will failed to influence the init_net. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 53eab513955..86f9b165bbb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1419,7 +1419,7 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, for (p = &tbl->parms; p; p = p->next) { if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || - (!p->dev && !ifindex)) + (!p->dev && !ifindex && net_eq(net, &init_net))) return p; } -- cgit v1.2.3 From dc25c676f54addb10e598daa9da9b8dd4fd487ab Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 20 Jun 2013 10:01:34 +0800 Subject: neigh: disallow un-init_net to change thresh of neigh thresh and interval are global resources, only init net can change them. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 86f9b165bbb..2569ab2cafb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2049,6 +2049,12 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) } } + err = -ENOENT; + if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || + tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && + !net_eq(net, &init_net)) + goto errout_tbl_lock; + if (tb[NDTA_THRESH1]) tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); -- cgit v1.2.3 From c2ff682a6f5c5ae2cb23b32bb4fd7a6fb059d4fc Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 19 Jun 2013 12:03:13 +0200 Subject: sit: fix an oops when IFLA_IPTUN_PROTO is not set The use of this attribute has been added in 32b8a8e59c9c (sit: add IPv4 over IPv4 support). It is optional, by default proto is IPPROTO_IPV6. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6cee844678e..f639866b3dc 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1296,7 +1296,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) { u8 proto; - if (!data) + if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); -- cgit v1.2.3 From 9ef71e0c820987c899e454e2e7ef94bc2d4c8d04 Mon Sep 17 00:00:00 2001 From: Weiping Pan Date: Tue, 18 Jun 2013 21:00:31 +0800 Subject: tcp:typo unset should be unsent Signed-off-by: Weiping Pan Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3dd46eab3b0..e2c1333ee31 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -185,7 +185,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) u32 tcp_default_init_rwnd(u32 mss) { /* Initial receive window should be twice of TCP_INIT_CWND to - * enable proper sending of new unset data during fast recovery + * enable proper sending of new unsent data during fast recovery * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a * limit when mss is larger than 1460. */ -- cgit v1.2.3 From 2c0740e4e122239bcf6127fd2063733c5fb20c93 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 17 Jun 2013 22:26:52 -0400 Subject: sctp: Convert __list_for_each use to list_for_each Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/sctp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 57b568c38ef..1de49c802d8 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -178,7 +178,7 @@ static void sctp_get_local_addr_list(struct net *net) rcu_read_lock(); for_each_netdev_rcu(net, dev) { - __list_for_each(pos, &sctp_address_families) { + list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&net->sctp.local_addr_list, dev); } -- cgit v1.2.3 From bcefe17cffd06efdda3e7ad679ea743236e6271a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 15 Jun 2013 09:39:18 +0800 Subject: tcp: introduce a per-route knob for quick ack In previous discussions, I tried to find some reasonable heuristics for delayed ACK, however this seems not possible, according to Eric: "ACKS might also be delayed because of bidirectional traffic, and is more controlled by the application response time. TCP stack can not easily estimate it." "ACK can be incredibly useful to recover from losses in a short time. The vast majority of TCP sessions are small lived, and we send one ACK per received segment anyway at beginning or retransmits to let the sender smoothly increase its cwnd, so an auto-tuning facility wont help them that much." and according to David: "ACKs are the only information we have to detect loss. And, for the same reasons that TCP VEGAS is fundamentally broken, we cannot measure the pipe or some other receiver-side-visible piece of information to determine when it's "safe" to stretch ACK. And even if it's "safe", we should not do it so that losses are accurately detected and we don't spuriously retransmit. The only way to know when the bandwidth increases is to "test" it, by sending more and more packets until drops happen. That's why all successful congestion control algorithms must operate on explicited tested pieces of information. Similarly, it's not really possible to universally know if it's safe to stretch ACK or not." It still makes sense to enable or disable quick ack mode like what TCP_QUICK_ACK does. Similar to TCP_QUICK_ACK option, but for people who can't modify the source code and still wants to control TCP delayed ACK behavior. As David suggested, this should belong to per-path scope, since different pathes may want different behaviors. Cc: Eric Dumazet Cc: Rick Jones Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf CC: David Laight Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 ++++- net/ipv4/tcp_output.c | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 46271cdcf08..28af45abe06 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3717,6 +3717,7 @@ void tcp_reset(struct sock *sk) static void tcp_fin(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + const struct dst_entry *dst; inet_csk_schedule_ack(sk); @@ -3728,7 +3729,9 @@ static void tcp_fin(struct sock *sk) case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); - inet_csk(sk)->icsk_ack.pingpong = 1; + dst = __sk_dst_get(sk); + if (!dst || !dst_metric(dst, RTAX_QUICKACK)) + inet_csk(sk)->icsk_ack.pingpong = 1; break; case TCP_CLOSE_WAIT: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e2c1333ee31..3d609490f11 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -160,6 +160,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, { struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; + const struct dst_entry *dst = __sk_dst_get(sk); if (sysctl_tcp_slow_start_after_idle && (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto)) @@ -170,8 +171,9 @@ static void tcp_event_data_sent(struct tcp_sock *tp, /* If it is a reply for ato after last received * packet, enter pingpong mode. */ - if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - icsk->icsk_ack.pingpong = 1; + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato && + (!dst || !dst_metric(dst, RTAX_QUICKACK))) + icsk->icsk_ack.pingpong = 1; } /* Account for an ACK we sent. */ -- cgit v1.2.3 From c9364636dcb01a6fc37ca2c6a51c5aa0c663013c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 15 Jun 2013 03:30:10 -0700 Subject: htb: refactor struct htb_sched fields for performance htb_sched structures are big, and source of false sharing on SMP. Every time a packet is queued or dequeue, many cache lines must be touched because structures are not lay out properly. By carefully splitting htb_sched in two parts, and define sub structures to increase data locality, we can improve performance dramatically on SMP. New htb_prio structure can also be used in htb_class to increase data locality. I got 26 % performance increase on a 24 threads machine, with 200 concurrent netperf in TCP_RR mode, using a HTB hierarchy of 4 classes. Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 181 +++++++++++++++++++++++++++------------------------- 1 file changed, 95 insertions(+), 86 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 7954e73d118..c2124ea29f4 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -76,6 +76,20 @@ enum htb_cmode { HTB_CAN_SEND /* class can send */ }; +struct htb_prio { + union { + struct rb_root row; + struct rb_root feed; + }; + struct rb_node *ptr; + /* When class changes from state 1->2 and disconnects from + * parent's feed then we lost ptr value and start from the + * first child again. Here we store classid of the + * last valid ptr (used when ptr is NULL). + */ + u32 last_ptr_id; +}; + /* interior & leaf nodes; props specific to leaves are marked L: * To reduce false sharing, place mostly read fields at beginning, * and mostly written ones at the end. @@ -112,19 +126,12 @@ struct htb_class { union { struct htb_class_leaf { - struct Qdisc *q; - int deficit[TC_HTB_MAXDEPTH]; struct list_head drop_list; + int deficit[TC_HTB_MAXDEPTH]; + struct Qdisc *q; } leaf; struct htb_class_inner { - struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ - struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ - /* When class changes from state 1->2 and disconnects from - * parent's feed then we lost ptr value and start from the - * first child again. Here we store classid of the - * last valid ptr (used when ptr is NULL). - */ - u32 last_ptr_id[TC_HTB_NUMPRIO]; + struct htb_prio clprio[TC_HTB_NUMPRIO]; } inner; } un; s64 pq_key; @@ -135,40 +142,39 @@ struct htb_class { struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ }; +struct htb_level { + struct rb_root wait_pq; + struct htb_prio hprio[TC_HTB_NUMPRIO]; +}; + struct htb_sched { struct Qdisc_class_hash clhash; - struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ - - /* self list - roots of self generating tree */ - struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - int row_mask[TC_HTB_MAXDEPTH]; - struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; + int defcls; /* class where unclassified flows go to */ + int rate2quantum; /* quant = rate / rate2quantum */ - /* self wait list - roots of wait PQs per row */ - struct rb_root wait_pq[TC_HTB_MAXDEPTH]; + /* filters for qdisc itself */ + struct tcf_proto *filter_list; - /* time of nearest event per level (row) */ - s64 near_ev_cache[TC_HTB_MAXDEPTH]; +#define HTB_WARN_TOOMANYEVENTS 0x1 + unsigned int warned; /* only one warning */ + int direct_qlen; + struct work_struct work; - int defcls; /* class where unclassified flows go to */ + /* non shaped skbs; let them go directly thru */ + struct sk_buff_head direct_queue; + long direct_pkts; - /* filters for qdisc itself */ - struct tcf_proto *filter_list; + struct qdisc_watchdog watchdog; - int rate2quantum; /* quant = rate / rate2quantum */ - s64 now; /* cached dequeue time */ - struct qdisc_watchdog watchdog; + s64 now; /* cached dequeue time */ + struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ - /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; - int direct_qlen; /* max qlen of above */ + /* time of nearest event per level (row) */ + s64 near_ev_cache[TC_HTB_MAXDEPTH]; - long direct_pkts; + int row_mask[TC_HTB_MAXDEPTH]; -#define HTB_WARN_TOOMANYEVENTS 0x1 - unsigned int warned; /* only one warning */ - struct work_struct work; + struct htb_level hlevel[TC_HTB_MAXDEPTH]; }; /* find class in global hash table using given handle */ @@ -284,7 +290,7 @@ static void htb_add_to_id_tree(struct rb_root *root, static void htb_add_to_wait_tree(struct htb_sched *q, struct htb_class *cl, s64 delay) { - struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; + struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL; cl->pq_key = q->now + delay; if (cl->pq_key == q->now) @@ -304,7 +310,7 @@ static void htb_add_to_wait_tree(struct htb_sched *q, p = &parent->rb_left; } rb_link_node(&cl->pq_node, parent, p); - rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]); + rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq); } /** @@ -331,7 +337,7 @@ static inline void htb_add_class_to_row(struct htb_sched *q, while (mask) { int prio = ffz(~mask); mask &= ~(1 << prio); - htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio); + htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio); } } @@ -357,16 +363,18 @@ static inline void htb_remove_class_from_row(struct htb_sched *q, struct htb_class *cl, int mask) { int m = 0; + struct htb_level *hlevel = &q->hlevel[cl->level]; while (mask) { int prio = ffz(~mask); + struct htb_prio *hprio = &hlevel->hprio[prio]; mask &= ~(1 << prio); - if (q->ptr[cl->level][prio] == cl->node + prio) - htb_next_rb_node(q->ptr[cl->level] + prio); + if (hprio->ptr == cl->node + prio) + htb_next_rb_node(&hprio->ptr); - htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio); - if (!q->row[cl->level][prio].rb_node) + htb_safe_rb_erase(cl->node + prio, &hprio->row); + if (!hprio->row.rb_node) m |= 1 << prio; } q->row_mask[cl->level] &= ~m; @@ -390,13 +398,13 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) int prio = ffz(~m); m &= ~(1 << prio); - if (p->un.inner.feed[prio].rb_node) + if (p->un.inner.clprio[prio].feed.rb_node) /* parent already has its feed in use so that * reset bit in mask as parent is already ok */ mask &= ~(1 << prio); - htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); + htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio); } p->prio_activity |= mask; cl = p; @@ -426,18 +434,19 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) int prio = ffz(~m); m &= ~(1 << prio); - if (p->un.inner.ptr[prio] == cl->node + prio) { + if (p->un.inner.clprio[prio].ptr == cl->node + prio) { /* we are removing child which is pointed to from * parent feed - forget the pointer but remember * classid */ - p->un.inner.last_ptr_id[prio] = cl->common.classid; - p->un.inner.ptr[prio] = NULL; + p->un.inner.clprio[prio].last_ptr_id = cl->common.classid; + p->un.inner.clprio[prio].ptr = NULL; } - htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio); + htb_safe_rb_erase(cl->node + prio, + &p->un.inner.clprio[prio].feed); - if (!p->un.inner.feed[prio].rb_node) + if (!p->un.inner.clprio[prio].feed.rb_node) mask |= 1 << prio; } @@ -652,7 +661,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, htb_change_class_mode(q, cl, &diff); if (old_mode != cl->cmode) { if (old_mode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); + htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq); if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } @@ -672,7 +681,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, * next pending event (0 for no event in pq, q->now for too many events). * Note: Applied are events whose have cl->pq_key <= q->now. */ -static s64 htb_do_events(struct htb_sched *q, int level, +static s64 htb_do_events(struct htb_sched *q, const int level, unsigned long start) { /* don't run for longer than 2 jiffies; 2 is used instead of @@ -680,10 +689,12 @@ static s64 htb_do_events(struct htb_sched *q, int level, * too soon */ unsigned long stop_at = start + 2; + struct rb_root *wait_pq = &q->hlevel[level].wait_pq; + while (time_before(jiffies, stop_at)) { struct htb_class *cl; s64 diff; - struct rb_node *p = rb_first(&q->wait_pq[level]); + struct rb_node *p = rb_first(wait_pq); if (!p) return 0; @@ -692,7 +703,7 @@ static s64 htb_do_events(struct htb_sched *q, int level, if (cl->pq_key > q->now) return cl->pq_key; - htb_safe_rb_erase(p, q->wait_pq + level); + htb_safe_rb_erase(p, wait_pq); diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); htb_change_class_mode(q, cl, &diff); if (cl->cmode != HTB_CAN_SEND) @@ -736,8 +747,7 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, * * Find leaf where current feed pointers points to. */ -static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, - struct rb_node **pptr, u32 * pid) +static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio) { int i; struct { @@ -746,10 +756,10 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, u32 *pid; } stk[TC_HTB_MAXDEPTH], *sp = stk; - BUG_ON(!tree->rb_node); - sp->root = tree->rb_node; - sp->pptr = pptr; - sp->pid = pid; + BUG_ON(!hprio->row.rb_node); + sp->root = hprio->row.rb_node; + sp->pptr = &hprio->ptr; + sp->pid = &hprio->last_ptr_id; for (i = 0; i < 65535; i++) { if (!*sp->pptr && *sp->pid) { @@ -776,12 +786,15 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, } } else { struct htb_class *cl; + struct htb_prio *clp; + cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); if (!cl->level) return cl; - (++sp)->root = cl->un.inner.feed[prio].rb_node; - sp->pptr = cl->un.inner.ptr + prio; - sp->pid = cl->un.inner.last_ptr_id + prio; + clp = &cl->un.inner.clprio[prio]; + (++sp)->root = clp->feed.rb_node; + sp->pptr = &clp->ptr; + sp->pid = &clp->last_ptr_id; } } WARN_ON(1); @@ -791,15 +804,16 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, /* dequeues packet at given priority and level; call only if * you are sure that there is active class at prio/level */ -static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, - int level) +static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio, + const int level) { struct sk_buff *skb = NULL; struct htb_class *cl, *start; + struct htb_level *hlevel = &q->hlevel[level]; + struct htb_prio *hprio = &hlevel->hprio[prio]; + /* look initial class up in the row */ - start = cl = htb_lookup_leaf(q->row[level] + prio, prio, - q->ptr[level] + prio, - q->last_ptr_id[level] + prio); + start = cl = htb_lookup_leaf(hprio, prio); do { next: @@ -819,9 +833,7 @@ next: if ((q->row_mask[level] & (1 << prio)) == 0) return NULL; - next = htb_lookup_leaf(q->row[level] + prio, - prio, q->ptr[level] + prio, - q->last_ptr_id[level] + prio); + next = htb_lookup_leaf(hprio, prio); if (cl == start) /* fix start if we just deleted it */ start = next; @@ -834,11 +846,9 @@ next: break; qdisc_warn_nonwc("htb", cl->un.leaf.q); - htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> - ptr[0]) + prio); - cl = htb_lookup_leaf(q->row[level] + prio, prio, - q->ptr[level] + prio, - q->last_ptr_id[level] + prio); + htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr: + &q->hlevel[0].hprio[prio].ptr); + cl = htb_lookup_leaf(hprio, prio); } while (cl != start); @@ -847,8 +857,8 @@ next: cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb); if (cl->un.leaf.deficit[level] < 0) { cl->un.leaf.deficit[level] += cl->quantum; - htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> - ptr[0]) + prio); + htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr : + &q->hlevel[0].hprio[prio].ptr); } /* this used to be after charge_class but this constelation * gives us slightly better performance @@ -888,15 +898,14 @@ ok: for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ int m; - s64 event; + s64 event = q->near_ev_cache[level]; - if (q->now >= q->near_ev_cache[level]) { + if (q->now >= event) { event = htb_do_events(q, level, start_at); if (!event) event = q->now + NSEC_PER_SEC; q->near_ev_cache[level] = event; - } else - event = q->near_ev_cache[level]; + } if (next_event > event) next_event = event; @@ -976,10 +985,8 @@ static void htb_reset(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); __skb_queue_purge(&q->direct_queue); sch->q.qlen = 0; - memset(q->row, 0, sizeof(q->row)); + memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); - memset(q->wait_pq, 0, sizeof(q->wait_pq)); - memset(q->ptr, 0, sizeof(q->ptr)); for (i = 0; i < TC_HTB_NUMPRIO; i++) INIT_LIST_HEAD(q->drops + i); } @@ -1200,7 +1207,8 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity); if (parent->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level); + htb_safe_rb_erase(&parent->pq_node, + &q->hlevel[parent->level].wait_pq); parent->level = 0; memset(&parent->un.inner, 0, sizeof(parent->un.inner)); @@ -1289,7 +1297,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); + htb_safe_rb_erase(&cl->pq_node, + &q->hlevel[cl->level].wait_pq); if (last_child) htb_parent_to_leaf(q, cl, new_q); @@ -1411,7 +1420,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { - htb_safe_rb_erase(&parent->pq_node, q->wait_pq); + htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq); parent->cmode = HTB_CAN_SEND; } parent->level = (parent->parent ? parent->parent->level -- cgit v1.2.3 From af92e5425e4a7cfbc9b85dc268acfaadb551cc56 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Sat, 15 Jun 2013 23:04:56 +0300 Subject: inet: frag , remove an empty ifdef. This patch removes an empty ifdef from inet_frag_intern() in net/ipv4/inet_fragment.c. commit b67bfe0d42cac56c512dd5da4b1b347a23f4b70a (hlist: drop the node parameter from iterators) removed hlist from net/ipv4/inet_fragment.c, but did not remove the enclosing ifdef command, which is now empty. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 7e06641e36a..4b864430a8c 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -247,8 +247,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, { struct inet_frag_bucket *hb; struct inet_frag_queue *qp; -#ifdef CONFIG_SMP -#endif unsigned int hash; read_lock(&f->lock); /* Protects against hash rebuild */ -- cgit v1.2.3 From 130ffbc2638ddc290fcbabe1b9ce6a5d333a6a97 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 12 Jun 2013 17:54:51 +0200 Subject: netfilter: check return code from nla_parse_tested These are the only calls under net/ that do not check nla_parse_nested() for its error code, but simply continue execution. If parsing of netlink attributes fails, we should return with an error instead of continuing. In nearly all of these calls we have a policy attached, that is being type verified during nla_parse_nested(), which we would miss checking for otherwise. Signed-off-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 30 +++++++++++++++++++++++------- net/netfilter/nfnetlink_cthelper.c | 16 ++++++++++++---- net/netfilter/nfnetlink_cttimeout.c | 6 ++++-- 3 files changed, 39 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6d0f8a17c5b..f83a52298ef 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -828,7 +828,9 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) struct nf_conntrack_l3proto *l3proto; int ret = 0; - nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); + ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); + if (ret < 0) + return ret; rcu_read_lock(); l3proto = __nf_ct_l3proto_find(tuple->src.l3num); @@ -895,7 +897,9 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], memset(tuple, 0, sizeof(*tuple)); - nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); + err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); + if (err < 0) + return err; if (!tb[CTA_TUPLE_IP]) return -EINVAL; @@ -946,9 +950,12 @@ static inline int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, struct nlattr **helpinfo) { + int err; struct nlattr *tb[CTA_HELP_MAX+1]; - nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); + err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); + if (err < 0) + return err; if (!tb[CTA_HELP_NAME]) return -EINVAL; @@ -1431,7 +1438,9 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[] struct nf_conntrack_l4proto *l4proto; int err = 0; - nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); + err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); + if (err < 0) + return err; rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); @@ -1452,9 +1461,12 @@ static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = { static inline int change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr) { + int err; struct nlattr *cda[CTA_NAT_SEQ_MAX+1]; - nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); + err = nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); + if (err < 0) + return err; if (!cda[CTA_NAT_SEQ_CORRECTION_POS]) return -EINVAL; @@ -2115,7 +2127,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) struct nlattr *cda[CTA_MAX+1]; int ret; - nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); + ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); + if (ret < 0) + return ret; spin_lock_bh(&nf_conntrack_lock); ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); @@ -2710,7 +2724,9 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, struct nf_conntrack_tuple nat_tuple = {}; int err; - nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy); + err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy); + if (err < 0) + return err; if (!tb[CTA_EXPECT_NAT_DIR] || !tb[CTA_EXPECT_NAT_TUPLE]) return -EINVAL; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a191b6db657..9e287cb56a0 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -67,9 +67,12 @@ static int nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, const struct nlattr *attr) { + int err; struct nlattr *tb[NFCTH_TUPLE_MAX+1]; - nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + if (err < 0) + return err; if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; @@ -121,9 +124,12 @@ static int nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, const struct nlattr *attr) { + int err; struct nlattr *tb[NFCTH_POLICY_MAX+1]; - nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + if (err < 0) + return err; if (!tb[NFCTH_POLICY_NAME] || !tb[NFCTH_POLICY_EXPECT_MAX] || @@ -153,8 +159,10 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; - nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, - nfnl_cthelper_expect_policy_set); + ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + if (ret < 0) + return ret; if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 65074dfb938..50580494148 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -59,8 +59,10 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) { struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1]; - nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, - attr, l4proto->ctnl_timeout.nla_policy); + ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, + attr, l4proto->ctnl_timeout.nla_policy); + if (ret < 0) + return ret; ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, &timeout->data); -- cgit v1.2.3 From 6547a221871f139cc56328a38105d47c14874cbe Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 13 Jun 2013 17:31:28 +0200 Subject: netfilter: nf_conntrack: avoid large timeout for mid-stream pickup When loose tracking is enabled (default), non-syn packets cause creation of new conntracks in established state with default timeout for established state (5 days). This causes the table to fill up with UNREPLIED when the 'new ack' packet happened to be the last-ack of a previous, already timed-out connection. Consider: A 192.168.x.52792 > 10.184.y.80: F, 426:426(0) ack 9237 win 255 B 10.184.y.80 > 192.168.x.52792: ., ack 427 win 123 <61 second pause> C 10.184.y.80 > 192.168.x.52792: F, 9237:9237(0) ack 427 win 123 D 192.168.x.52792 > 10.184.y.80: ., ack 9238 win 255 B moves conntrack to CLOSE_WAIT and will kill it after 60 second timeout, C is ignored (FIN set), but last packet (D) causes new ct with 5-days timeout. Use UNACK timeout (5 minutes) instead to get rid of these entries sooner when in ESTABLISHED state without having seen traffic in both directions. Signed-off-by: Florian Westphal Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4d4d8f1d01f..7dcc376eea5 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1043,6 +1043,12 @@ static int tcp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } + /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection + * pickup with loose=1. Avoid large ESTABLISHED timeout. + */ + if (new_state == TCP_CONNTRACK_ESTABLISHED && + timeout > timeouts[TCP_CONNTRACK_UNACK]) + timeout = timeouts[TCP_CONNTRACK_UNACK]; } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) -- cgit v1.2.3 From 681f130f39e10087475383e6771b9366e26bab0c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jun 2013 05:52:22 -0700 Subject: netfilter: xt_socket: add XT_SOCKET_NOWILDCARD flag xt_socket module can be a nice replacement to conntrack module in some cases (SYN filtering for example) But it lacks the ability to match the 3rd packet of TCP handshake (ACK coming from the client). Add a XT_SOCKET_NOWILDCARD flag to disable the wildcard mechanism. The wildcard is the legacy socket match behavior, that ignores LISTEN sockets bound to INADDR_ANY (or ipv6 equivalent) iptables -I INPUT -p tcp --syn -j SYN_CHAIN iptables -I INPUT -m socket --nowildcard -j ACCEPT Signed-off-by: Eric Dumazet Cc: Patrick McHardy Cc: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 70 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 02704245710..f8b71911037 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -163,8 +163,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, bool wildcard; bool transparent = true; - /* Ignore sockets listening on INADDR_ANY */ - wildcard = (sk->sk_state != TCP_TIME_WAIT && + /* Ignore sockets listening on INADDR_ANY, + * unless XT_SOCKET_NOWILDCARD is set + */ + wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && + sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->inet_rcv_saddr == 0); /* Ignore non-transparent sockets, @@ -197,7 +200,7 @@ socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) } static bool -socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { return socket_match(skb, par, par->matchinfo); } @@ -259,7 +262,7 @@ extract_icmp6_fields(const struct sk_buff *skb, } static bool -socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; @@ -302,8 +305,11 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) bool wildcard; bool transparent = true; - /* Ignore sockets listening on INADDR_ANY */ - wildcard = (sk->sk_state != TCP_TIME_WAIT && + /* Ignore sockets listening on INADDR_ANY + * unless XT_SOCKET_NOWILDCARD is set + */ + wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && + sk->sk_state != TCP_TIME_WAIT && ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); /* Ignore non-transparent sockets, @@ -331,6 +337,28 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) } #endif +static int socket_mt_v1_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V1) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); + return -EINVAL; + } + return 0; +} + +static int socket_mt_v2_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V2) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); + return -EINVAL; + } + return 0; +} + static struct xt_match socket_mt_reg[] __read_mostly = { { .name = "socket", @@ -345,7 +373,8 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV4, - .match = socket_mt4_v1, + .match = socket_mt4_v1_v2, + .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), @@ -356,7 +385,32 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV6, - .match = socket_mt6_v1, + .match = socket_mt6_v1_v2, + .checkentry = socket_mt_v1_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#endif + { + .name = "socket", + .revision = 2, + .family = NFPROTO_IPV4, + .match = socket_mt4_v1_v2, + .checkentry = socket_mt_v2_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#ifdef XT_SOCKET_HAVE_IPV6 + { + .name = "socket", + .revision = 2, + .family = NFPROTO_IPV6, + .match = socket_mt6_v1_v2, + .checkentry = socket_mt_v2_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), -- cgit v1.2.3 From c5623556fc61804713b1569b4f748e36956bc6e8 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:33 +0300 Subject: Bluetooth: Handle LE L2CAP signalling in its own function The LE L2CAP signalling channel follows its own rules and will continue to evolve independently from the BR/EDR signalling channel. Therefore, it makes sense to have a clear split from BR/EDR by having a dedicated function for handling LE signalling commands. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 53 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4be6a264b47..ab996111818 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5292,6 +5292,51 @@ static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, } } +static inline void l2cap_le_sig_channel(struct l2cap_conn *conn, + struct sk_buff *skb) +{ + u8 *data = skb->data; + int len = skb->len; + struct l2cap_cmd_hdr cmd; + int err; + + l2cap_raw_recv(conn, skb); + + while (len >= L2CAP_CMD_HDR_SIZE) { + u16 cmd_len; + memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE); + data += L2CAP_CMD_HDR_SIZE; + len -= L2CAP_CMD_HDR_SIZE; + + cmd_len = le16_to_cpu(cmd.len); + + BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, + cmd.ident); + + if (cmd_len > len || !cmd.ident) { + BT_DBG("corrupted command"); + break; + } + + err = l2cap_le_sig_cmd(conn, &cmd, data); + if (err) { + struct l2cap_cmd_rej_unk rej; + + BT_ERR("Wrong link type (%d)", err); + + /* FIXME: Map err to a valid reason */ + rej.reason = __constant_cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); + l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, + sizeof(rej), &rej); + } + + data += cmd_len; + len -= cmd_len; + } + + kfree_skb(skb); +} + static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { @@ -5318,11 +5363,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, break; } - if (conn->hcon->type == LE_LINK) - err = l2cap_le_sig_cmd(conn, &cmd, data); - else - err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data); - + err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data); if (err) { struct l2cap_cmd_rej_unk rej; @@ -6395,6 +6436,8 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) switch (cid) { case L2CAP_CID_LE_SIGNALING: + l2cap_le_sig_channel(conn, skb); + break; case L2CAP_CID_SIGNALING: l2cap_sig_channel(conn, skb); break; -- cgit v1.2.3 From 073d1cf35fe45d89f5a553c21eea18b504dd6937 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:35 +0300 Subject: Bluetooth: Rename L2CAP_CID_LE_DATA to L2CAP_CID_ATT In future Core Specification versions the ATT CID will be just one of many possible CIDs that can be used for data transfer. Therefore, it makes sense to rename the define for the ATT CID to something less ambigous. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 14 +++++++------- net/bluetooth/l2cap_sock.c | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ab996111818..c87f8f94083 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -504,8 +504,8 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) if (conn->hcon->type == LE_LINK) { /* LE connection */ chan->omtu = L2CAP_DEFAULT_MTU; - chan->scid = L2CAP_CID_LE_DATA; - chan->dcid = L2CAP_CID_LE_DATA; + chan->scid = L2CAP_CID_ATT; + chan->dcid = L2CAP_CID_ATT; } else { /* Alloc CID for connection-oriented socket */ chan->scid = l2cap_alloc_cid(conn); @@ -1344,7 +1344,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) BT_DBG(""); /* Check if we have socket listening on cid */ - pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_LE_DATA, + pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_ATT, conn->src, conn->dst); if (!pchan) return; @@ -1792,7 +1792,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, auth_type = l2cap_get_auth_type(chan); - if (chan->dcid == L2CAP_CID_LE_DATA) + if (chan->dcid == L2CAP_CID_ATT) hcon = hci_connect(hdev, LE_LINK, dst, dst_type, chan->sec_level, auth_type); else @@ -6397,7 +6397,7 @@ static void l2cap_att_channel(struct l2cap_conn *conn, { struct l2cap_chan *chan; - chan = l2cap_global_chan_by_scid(0, L2CAP_CID_LE_DATA, + chan = l2cap_global_chan_by_scid(0, L2CAP_CID_ATT, conn->src, conn->dst); if (!chan) goto drop; @@ -6448,7 +6448,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) l2cap_conless_channel(conn, psm, skb); break; - case L2CAP_CID_LE_DATA: + case L2CAP_CID_ATT: l2cap_att_channel(conn, skb); break; @@ -6574,7 +6574,7 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) continue; } - if (chan->scid == L2CAP_CID_LE_DATA) { + if (chan->scid == L2CAP_CID_ATT) { if (!status && encrypt) { chan->sec_level = hcon->sec_level; l2cap_chan_ready(chan); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 36fed40c162..0098af80b21 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -466,7 +466,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu) { switch (chan->scid) { - case L2CAP_CID_LE_DATA: + case L2CAP_CID_ATT: if (mtu < L2CAP_LE_MIN_MTU) return false; break; @@ -630,7 +630,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, conn = chan->conn; /*change security for LE channels */ - if (chan->scid == L2CAP_CID_LE_DATA) { + if (chan->scid == L2CAP_CID_ATT) { if (!conn->hcon->out) { err = -EINVAL; break; -- cgit v1.2.3 From f224ca5fc207a9957164e6f42ec6766da0f55d54 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:36 +0300 Subject: Bluetooth: Fix LE vs BR/EDR selection when connecting The choice between LE and BR/EDR should be made on the destination address type instead of the destination CID. This is particularly important when in the future more than one CID will be allowed for LE. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c87f8f94083..29398293d50 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1792,7 +1792,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, auth_type = l2cap_get_auth_type(chan); - if (chan->dcid == L2CAP_CID_ATT) + if (bdaddr_type_is_le(dst_type)) hcon = hci_connect(hdev, LE_LINK, dst, dst_type, chan->sec_level, auth_type); else -- cgit v1.2.3 From 141d57065afd11977c4d346f64b25350445bf689 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:37 +0300 Subject: Bluetooth: Fix EBUSY condition test in l2cap_chan_connect The current test in l2cap_chan_connect is intended to protect against multiple conflicting connect attempts. However, it assumes that there will ever only be a single CID that is connected to, which is not true. We do need to check for conflicts with connect attempts to the same destination CID but this check is not in anyway specific to LE but can be applied to BR/EDR as well. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 29398293d50..640423b4f41 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1811,16 +1811,10 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, goto done; } - if (hcon->type == LE_LINK) { - err = 0; - - if (!list_empty(&conn->chan_l)) { - err = -EBUSY; - hci_conn_drop(hcon); - } - - if (err) - goto done; + if (cid && __l2cap_get_chan_by_dcid(conn, cid)) { + hci_conn_drop(hcon); + err = -EBUSY; + goto done; } /* Update source addr of the socket */ -- cgit v1.2.3 From 9f22398ce4baf816535415e65949d03f55a7973a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:38 +0300 Subject: Bluetooth: Fix hardcoding ATT CID in __l2cap_chan_add() Since in the future more than the ATT CID may be permissible we should not be hardcoding it for all LE connections in __l2cap_chan_add(). Instead, the source ATT CID should only be set if the destination is also ATT, and in other cases we should just use the existing dynamic CID allocation function. Assigning scid based on dcid means that whenever __l2cap_chan_add() is called that chan->dcid is properly initialized. l2cap_le_conn_ready() wasn't initializing is properly so this is also taken care of in this patch. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 640423b4f41..4803610187e 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -504,8 +504,10 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) if (conn->hcon->type == LE_LINK) { /* LE connection */ chan->omtu = L2CAP_DEFAULT_MTU; - chan->scid = L2CAP_CID_ATT; - chan->dcid = L2CAP_CID_ATT; + if (chan->dcid == L2CAP_CID_ATT) + chan->scid = L2CAP_CID_ATT; + else + chan->scid = l2cap_alloc_cid(conn); } else { /* Alloc CID for connection-oriented socket */ chan->scid = l2cap_alloc_cid(conn); @@ -1357,6 +1359,8 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) if (!chan) goto clean; + chan->dcid = L2CAP_CID_ATT; + sk = chan->sk; hci_conn_hold(conn->hcon); -- cgit v1.2.3 From d8729922b474eab65ca738028a2e69fb12e2eaa6 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:39 +0300 Subject: Bluetooth: Add clarifying comment to l2cap_conn_ready() There is an extra call to smp_conn_security() for outgoing LE connections from l2cap_conn_ready() but the reason for this call is far from clear. After a bit of commit history research and using git blame I found out that this extra call is for socket-less pairing processes added by commit 160dc6ac1. This patch adds a clarifying comment to the code for this. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4803610187e..417d17723ee 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1387,6 +1387,9 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) if (!hcon->out && hcon->type == LE_LINK) l2cap_le_conn_ready(conn); + /* For outgoing pairing which doesn't necessarily have an + * associated socket (e.g. mgmt_pair_device). + */ if (hcon->out && hcon->type == LE_LINK) smp_conn_security(hcon, hcon->pending_sec_level); -- cgit v1.2.3 From 97f57c0b14ad2ef0628fc6db48cd6c08e0e52c50 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:40 +0300 Subject: Bluetooth: Fix duplicate call to l2cap_chan_ready() In l2cap_le_conn_ready() after doing l2cap_chann_add() the LE channel is part of the list which is subsequently iterated in l2cap_conn_ready() in this loop each channel will get l2cap_chan_ready() called which would result in trying to set the channel two times into BT_CONNECTED state. Instead it makes sense to just add the channel but not call chan_ready in l2cap_le_conn_ready, which is what this patch does. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 417d17723ee..843463ecd7b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1371,8 +1371,6 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) l2cap_chan_add(conn, chan); - l2cap_chan_ready(chan); - clean: release_sock(parent); } -- cgit v1.2.3 From 60bac184c9c7df2299aca4dc45c4b5b486f49a89 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:41 +0300 Subject: Bluetooth: Remove useless sk variable in l2cap_le_conn_ready The sk variable is of quite little use since it's only used to simplify access in the two bt_sk() calls. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 843463ecd7b..1557c3c774f 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1340,7 +1340,7 @@ static struct l2cap_chan *l2cap_global_chan_by_scid(int state, u16 cid, static void l2cap_le_conn_ready(struct l2cap_conn *conn) { - struct sock *parent, *sk; + struct sock *parent; struct l2cap_chan *chan, *pchan; BT_DBG(""); @@ -1361,13 +1361,11 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) chan->dcid = L2CAP_CID_ATT; - sk = chan->sk; - hci_conn_hold(conn->hcon); conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; - bacpy(&bt_sk(sk)->src, conn->src); - bacpy(&bt_sk(sk)->dst, conn->dst); + bacpy(&bt_sk(chan->sk)->src, conn->src); + bacpy(&bt_sk(chan->sk)->dst, conn->dst); l2cap_chan_add(conn, chan); -- cgit v1.2.3 From af1c01349ecc2b8ab2c329e4dbd46e9018469bd1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:42 +0300 Subject: Bluetooth: Remove unnecessary L2CAP channel state check In l2cap_att_channel() we're only interested in the BT_CONNECTED state so this state can directly be passed to l2cap_global_chan_by_scid(). This way there's no need to do any additional state check later. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 1557c3c774f..55c6836796f 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6394,16 +6394,13 @@ static void l2cap_att_channel(struct l2cap_conn *conn, { struct l2cap_chan *chan; - chan = l2cap_global_chan_by_scid(0, L2CAP_CID_ATT, + chan = l2cap_global_chan_by_scid(BT_CONNECTED, L2CAP_CID_ATT, conn->src, conn->dst); if (!chan) goto drop; BT_DBG("chan %p, len %d", chan, skb->len); - if (chan->state != BT_BOUND && chan->state != BT_CONNECTED) - goto drop; - if (chan->imtu < skb->len) goto drop; -- cgit v1.2.3 From 5ee9891dd8a63df1bf2ccd437872ad30a5850449 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:43 +0300 Subject: Bluetooth: Simplify hci_conn_hold/drop logic for L2CAP The L2CAP code has been incrementing the hci_conn reference for each l2cap_chan instance in the l2cap_conn list. Likewise, the reference is dropped each time an l2cap_chan is removed from the list. The reference counting policy with respect to removal has been clear and explicit in the l2cap_chan_del function, however for addition the function calling 2cap_chan_add has always had to do a separate hci_conn_hold call. What made the counting even more hard to follow is that the hci_connect() procedure increments the reference and the L2CAP layer making this call took advantage of it to use it as its own reference. This patch aims to clarify things by having the call to hci_conn_hold inside __l2cap_chan_add, thereby removing the need to do it in the functions calling __l2cap_chan_add. The reference count for hci_connect is still kept as it's necessary for users such as mgmt_pair_device, however for the L2CAP layer it means that an extra call to hci_conn_drop must be performed once l2cap_chan_add has been done. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 55c6836796f..d7b501ba271 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -545,6 +545,8 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) l2cap_chan_hold(chan); + hci_conn_hold(conn->hcon); + list_add(&chan->list, &conn->chan_l); } @@ -1361,7 +1363,6 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) chan->dcid = L2CAP_CID_ATT; - hci_conn_hold(conn->hcon); conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; bacpy(&bt_sk(chan->sk)->src, conn->src); @@ -1827,6 +1828,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, l2cap_chan_add(conn, chan); l2cap_chan_lock(chan); + /* l2cap_chan_add takes its own ref so we can drop this one */ + hci_conn_drop(hcon); + l2cap_state_change(chan, BT_CONNECT); __set_chan_timer(chan, sk->sk_sndtimeo); @@ -3748,8 +3752,6 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, sk = chan->sk; - hci_conn_hold(conn->hcon); - bacpy(&bt_sk(sk)->src, conn->src); bacpy(&bt_sk(sk)->dst, conn->dst); chan->psm = psm; -- cgit v1.2.3 From 0cc59a72c723979cf8973aff4df874a5f7a697c7 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:44 +0300 Subject: Bluetooth: Remove useless hci_conn disc_timeout setting There's no need to reset disc_timeout in l2cap_le_conn_ready since HCI_DISCONN_TIMEOUT is the default when the hci_conn is created and there should be no way for it to get changed between creation and l2cap_le_conn_ready being called. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d7b501ba271..8ae6a21cf64 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1363,8 +1363,6 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) chan->dcid = L2CAP_CID_ATT; - conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; - bacpy(&bt_sk(chan->sk)->src, conn->src); bacpy(&bt_sk(chan->sk)->dst, conn->dst); -- cgit v1.2.3 From 44f3b0fbaa9bfa7a88577ee8c446d0a78cb1d73a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 29 Apr 2013 19:35:45 +0300 Subject: Bluetooth: Fix multiple LE socket handling The LE ATT server socket needs to be superseded by any ATT client sockets. Previously this was done by looking at the hcon->out variable (indicating whether the connection is outgoing or incoming) which is a too crude way of determining whether the server socket needs to be picked or not (an outgoing connection doesn't necessarily mean that an ATT client socket has triggered it). This patch extends the ATT server socket lookup function (l2cap_le_conn_ready) to be used for all LE connections (regardless of the hcon->out value) and adds an internal check into the function for the existence of any ATT client sockets (in which case the server socket should be skipped). For this to work reliably all lookups must be done while the l2cap_conn->chan_lock is held, meaning also that the call to l2cap_chan_add needs to be changed to its lockless __l2cap_chan_add counterpart. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 8ae6a21cf64..9af3a76844f 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1353,6 +1353,10 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) if (!pchan) return; + /* Client ATT sockets should override the server one */ + if (__l2cap_get_chan_by_dcid(conn, L2CAP_CID_ATT)) + return; + parent = pchan->sk; lock_sock(parent); @@ -1366,7 +1370,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) bacpy(&bt_sk(chan->sk)->src, conn->src); bacpy(&bt_sk(chan->sk)->dst, conn->dst); - l2cap_chan_add(conn, chan); + __l2cap_chan_add(conn, chan); clean: release_sock(parent); @@ -1379,9 +1383,6 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) BT_DBG("conn %p", conn); - if (!hcon->out && hcon->type == LE_LINK) - l2cap_le_conn_ready(conn); - /* For outgoing pairing which doesn't necessarily have an * associated socket (e.g. mgmt_pair_device). */ @@ -1390,6 +1391,9 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) mutex_lock(&conn->chan_lock); + if (hcon->type == LE_LINK) + l2cap_le_conn_ready(conn); + list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); -- cgit v1.2.3 From 1f9b9a5dc5bb8ee360db9d32b2090aac497ae82a Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:27 -0300 Subject: Bluetooth: Make inquiry_cache_flush non-static In order to use HCI request framework in start_discovery, we'll need to call inquiry_cache_flush in mgmt.c. Therefore, this patch adds the hci_ prefix to inquiry_cache_flush and makes it non-static. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ace5e55fe5a..43c63877c5b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -751,7 +751,7 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) hdev->discovery.state = state; } -static void inquiry_cache_flush(struct hci_dev *hdev) +void hci_inquiry_cache_flush(struct hci_dev *hdev) { struct discovery_state *cache = &hdev->discovery; struct inquiry_entry *p, *n; @@ -964,7 +964,7 @@ int hci_inquiry(void __user *arg) hci_dev_lock(hdev); if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) { - inquiry_cache_flush(hdev); + hci_inquiry_cache_flush(hdev); do_inquiry = 1; } hci_dev_unlock(hdev); @@ -1230,7 +1230,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) cancel_delayed_work_sync(&hdev->le_scan_disable); hci_dev_lock(hdev); - inquiry_cache_flush(hdev); + hci_inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); hci_dev_unlock(hdev); @@ -1331,7 +1331,7 @@ int hci_dev_reset(__u16 dev) skb_queue_purge(&hdev->cmd_q); hci_dev_lock(hdev); - inquiry_cache_flush(hdev); + hci_inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); hci_dev_unlock(hdev); @@ -3562,7 +3562,7 @@ int hci_do_inquiry(struct hci_dev *hdev, u8 length) if (test_bit(HCI_INQUIRY, &hdev->flags)) return -EINPROGRESS; - inquiry_cache_flush(hdev); + hci_inquiry_cache_flush(hdev); memset(&cp, 0, sizeof(cp)); memcpy(&cp.lap, lap, sizeof(cp.lap)); -- cgit v1.2.3 From 7c3077207c705d0aa200ce22d49a0376d194dfca Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:28 -0300 Subject: Bluetooth: Update start_discovery to use HCI request This patch modifies the start_discovery function so it uses the HCI request framework. We build the HCI request according to the discovery type (add inquiry or LE scan HCI commands) and run the HCI request. We also register the start_discovery_complete callback which handles mgmt command complete events for this command. This way, we move all start_ discovery mgmt handling code spread in hci_event.c to a single place in mgmt.c. This patch also merges the LE-only and interleaved discovery type cases since these cases are pretty much the same now. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 85 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f8ecbc70293..434df715448 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2650,11 +2650,51 @@ int mgmt_interleaved_discovery(struct hci_dev *hdev) return err; } +static void start_discovery_complete(struct hci_dev *hdev, u8 status) +{ + BT_DBG("status %d", status); + + if (status) { + hci_dev_lock(hdev); + mgmt_start_discovery_failed(hdev, status); + hci_dev_unlock(hdev); + return; + } + + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_FINDING); + hci_dev_unlock(hdev); + + switch (hdev->discovery.type) { + case DISCOV_TYPE_LE: + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, + LE_SCAN_TIMEOUT_LE_ONLY); + break; + + case DISCOV_TYPE_INTERLEAVED: + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, + LE_SCAN_TIMEOUT_BREDR_LE); + break; + + case DISCOV_TYPE_BREDR: + break; + + default: + BT_ERR("Invalid discovery type %d", hdev->discovery.type); + } +} + static int start_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_start_discovery *cp = data; struct pending_cmd *cmd; + struct hci_cp_le_set_scan_param param_cp; + struct hci_cp_le_set_scan_enable enable_cp; + struct hci_cp_inquiry inq_cp; + struct hci_request req; + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; int err; BT_DBG("%s", hdev->name); @@ -2687,6 +2727,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hdev->discovery.type = cp->type; + hci_req_init(&req, hdev); + switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: if (!lmp_bredr_capable(hdev)) { @@ -2696,10 +2738,23 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR); + if (test_bit(HCI_INQUIRY, &hdev->flags)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_BUSY); + mgmt_pending_remove(cmd); + goto failed; + } + + hci_inquiry_cache_flush(hdev); + + memset(&inq_cp, 0, sizeof(inq_cp)); + memcpy(&inq_cp.lap, lap, sizeof(inq_cp.lap)); + inq_cp.length = INQUIRY_LEN_BREDR; + hci_req_add(&req, HCI_OP_INQUIRY, sizeof(inq_cp), &inq_cp); break; case DISCOV_TYPE_LE: + case DISCOV_TYPE_INTERLEAVED: if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, MGMT_STATUS_NOT_SUPPORTED); @@ -2707,20 +2762,40 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, - LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); - break; - - case DISCOV_TYPE_INTERLEAVED: - if (!lmp_host_le_capable(hdev) || !lmp_bredr_capable(hdev)) { + if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED && + !lmp_bredr_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, MGMT_STATUS_NOT_SUPPORTED); mgmt_pending_remove(cmd); goto failed; } - err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, - LE_SCAN_WIN, LE_SCAN_TIMEOUT_BREDR_LE); + if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_REJECTED); + mgmt_pending_remove(cmd); + goto failed; + } + + if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_BUSY); + mgmt_pending_remove(cmd); + goto failed; + } + + memset(¶m_cp, 0, sizeof(param_cp)); + param_cp.type = LE_SCAN_ACTIVE; + param_cp.interval = cpu_to_le16(LE_SCAN_INT); + param_cp.window = cpu_to_le16(LE_SCAN_WIN); + hci_req_add(&req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), + ¶m_cp); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_ENABLE; + enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), + &enable_cp); break; default: @@ -2730,6 +2805,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } + err = hci_req_run(&req, start_discovery_complete); if (err < 0) mgmt_pending_remove(cmd); else -- cgit v1.2.3 From fef5234a791507a2fe1ccfc85f080523fe762320 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:29 -0300 Subject: Bluetooth: Remove start discovery handling from hci_event.c Since all mgmt start discovery command complete events are now handled in start_discovery_complete callback in mgmt.c, we can remove this handling from hci_event.c. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b93cd2eb5d5..0e71e6c4739 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -943,12 +943,6 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (status) { - hci_dev_lock(hdev); - mgmt_start_discovery_failed(hdev, status); - hci_dev_unlock(hdev); - return; - } } static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, @@ -965,18 +959,10 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, switch (cp->enable) { case LE_SCAN_ENABLE: - if (status) { - hci_dev_lock(hdev); - mgmt_start_discovery_failed(hdev, status); - hci_dev_unlock(hdev); + if (status) return; - } set_bit(HCI_LE_SCAN, &hdev->dev_flags); - - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_FINDING); - hci_dev_unlock(hdev); break; case LE_SCAN_DISABLE: @@ -1077,18 +1063,10 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) if (status) { hci_conn_check_pending(hdev); - hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) - mgmt_start_discovery_failed(hdev, status); - hci_dev_unlock(hdev); return; } set_bit(HCI_INQUIRY, &hdev->flags); - - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_FINDING); - hci_dev_unlock(hdev); } static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) -- cgit v1.2.3 From 41dc2bd6d13bfccc34d05586be2eb65876a5990a Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:30 -0300 Subject: Bluetooth: Make mgmt_start_discovery_failed static mgmt_start_discovery_failed is now only used in mgmt.c so we can make it a local function. This patch also moves the mgmt_start_ discovery_failed definition up in mgmt.c to avoid forward declaration. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 434df715448..a9bd271a736 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2650,6 +2650,27 @@ int mgmt_interleaved_discovery(struct hci_dev *hdev) return err; } +static int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) +{ + struct pending_cmd *cmd; + u8 type; + int err; + + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + + cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev); + if (!cmd) + return -ENOENT; + + type = hdev->discovery.type; + + err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), + &type, sizeof(type)); + mgmt_pending_remove(cmd); + + return err; +} + static void start_discovery_complete(struct hci_dev *hdev, u8 status) { BT_DBG("status %d", status); @@ -4190,27 +4211,6 @@ int mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, sizeof(*ev) + eir_len, NULL); } -int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) -{ - struct pending_cmd *cmd; - u8 type; - int err; - - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - - cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev); - if (!cmd) - return -ENOENT; - - type = hdev->discovery.type; - - err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), - &type, sizeof(type)); - mgmt_pending_remove(cmd); - - return err; -} - int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status) { struct pending_cmd *cmd; -- cgit v1.2.3 From 0d8cc935e01c0fd1312a10881f4c0f1c4b4d05ab Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:31 -0300 Subject: Bluetooth: Move discovery macros to hci_core.h Some of discovery macros will be used in hci_core so we need to define them in common place such as hci_core.h. Thus, this patch moves discovery macros to hci_core.h and also adds the DISCOV_ prefix to them. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a9bd271a736..6b31e93af76 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -102,18 +102,6 @@ static const u16 mgmt_events[] = { MGMT_EV_PASSKEY_NOTIFY, }; -/* - * These LE scan and inquiry parameters were chosen according to LE General - * Discovery Procedure specification. - */ -#define LE_SCAN_WIN 0x12 -#define LE_SCAN_INT 0x12 -#define LE_SCAN_TIMEOUT_LE_ONLY msecs_to_jiffies(10240) -#define LE_SCAN_TIMEOUT_BREDR_LE msecs_to_jiffies(5120) - -#define INQUIRY_LEN_BREDR 0x08 /* TGAP(100) */ -#define INQUIRY_LEN_BREDR_LE 0x04 /* TGAP(100)/2 */ - #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) #define hdev_is_powered(hdev) (test_bit(HCI_UP, &hdev->flags) && \ @@ -2641,7 +2629,7 @@ int mgmt_interleaved_discovery(struct hci_dev *hdev) hci_dev_lock(hdev); - err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR_LE); + err = hci_do_inquiry(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN); if (err < 0) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); @@ -2689,12 +2677,12 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status) switch (hdev->discovery.type) { case DISCOV_TYPE_LE: queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - LE_SCAN_TIMEOUT_LE_ONLY); + DISCOV_LE_TIMEOUT); break; case DISCOV_TYPE_INTERLEAVED: queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - LE_SCAN_TIMEOUT_BREDR_LE); + DISCOV_INTERLEAVED_TIMEOUT); break; case DISCOV_TYPE_BREDR: @@ -2770,7 +2758,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, memset(&inq_cp, 0, sizeof(inq_cp)); memcpy(&inq_cp.lap, lap, sizeof(inq_cp.lap)); - inq_cp.length = INQUIRY_LEN_BREDR; + inq_cp.length = DISCOV_BREDR_INQUIRY_LEN; hci_req_add(&req, HCI_OP_INQUIRY, sizeof(inq_cp), &inq_cp); break; @@ -2807,8 +2795,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, memset(¶m_cp, 0, sizeof(param_cp)); param_cp.type = LE_SCAN_ACTIVE; - param_cp.interval = cpu_to_le16(LE_SCAN_INT); - param_cp.window = cpu_to_le16(LE_SCAN_WIN); + param_cp.interval = cpu_to_le16(DISCOV_LE_SCAN_INT); + param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); hci_req_add(&req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), ¶m_cp); -- cgit v1.2.3 From 4c87eaab01df271c81f6a68e3c28dbd44d348004 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:32 -0300 Subject: Bluetooth: Use HCI request in interleaved discovery In order to have a better HCI error handling in interleaved discovery functionality, we should use the HCI request framework. This patch updates le_scan_disable_work function so it uses the HCI request framework instead of the hci_send_cmd helper. A complete callback is registered (le_scan_disable_work_complete function) so we are able to trigger the inquiry procedure (if we are running the interleaved discovery) or to stop the discovery procedure (if we are running LE-only discovery). This patch also removes the extra logic in hci_cc_le_set_scan_enable to trigger the inquiry procedure and the mgmt_interleaved_discovery function since they become useless. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++- net/bluetooth/hci_event.c | 10 -------- net/bluetooth/mgmt.c | 17 ------------- 3 files changed, 64 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 43c63877c5b..9270d7ee489 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2067,17 +2067,80 @@ int hci_cancel_le_scan(struct hci_dev *hdev) return 0; } +static void inquiry_complete(struct hci_dev *hdev, u8 status) +{ + if (status) { + BT_ERR("Failed to start inquiry: status %d", status); + + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + return; + } +} + +static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) +{ + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; + struct hci_request req; + struct hci_cp_inquiry cp; + int err; + + if (status) { + BT_ERR("Failed to disable LE scanning: status %d", status); + return; + } + + switch (hdev->discovery.type) { + case DISCOV_TYPE_LE: + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + break; + + case DISCOV_TYPE_INTERLEAVED: + hci_req_init(&req, hdev); + + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; + hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + hci_dev_lock(hdev); + + hci_inquiry_cache_flush(hdev); + + err = hci_req_run(&req, inquiry_complete); + if (err) { + BT_ERR("Inquiry request failed: err %d", err); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + } + + hci_dev_unlock(hdev); + break; + } +} + static void le_scan_disable_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, le_scan_disable.work); struct hci_cp_le_set_scan_enable cp; + struct hci_request req; + int err; BT_DBG("%s", hdev->name); + hci_req_init(&req, hdev); + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_DISABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + err = hci_req_run(&req, le_scan_disable_work_complete); + if (err) + BT_ERR("Disable LE scanning request failed: err %d", err); } static void le_scan_work(struct work_struct *work) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0e71e6c4739..faaf1f31345 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -974,16 +974,6 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, } clear_bit(HCI_LE_SCAN, &hdev->dev_flags); - - if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED && - hdev->discovery.state == DISCOVERY_FINDING) { - mgmt_interleaved_discovery(hdev); - } else { - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - } - break; default: diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6b31e93af76..743100f3ab9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2621,23 +2621,6 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, return err; } -int mgmt_interleaved_discovery(struct hci_dev *hdev) -{ - int err; - - BT_DBG("%s", hdev->name); - - hci_dev_lock(hdev); - - err = hci_do_inquiry(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN); - if (err < 0) - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - - hci_dev_unlock(hdev); - - return err; -} - static int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) { struct pending_cmd *cmd; -- cgit v1.2.3 From 0e05bba6f6f8c2dca7a13fe0566742277e92df07 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:33 -0300 Subject: Bluetooth: Update stop_discovery to use HCI request This patch modifies the stop_discovery function so it uses the HCI request framework. The HCI request is built according to the current discovery state (inquiry, LE scanning or name resolving) and a complete callback is register to handle the command complete event for the stop discovery command. This way, we move all stop_discovery mgmt handling code spread in hci_event.c to a single place in mgmt.c. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 47 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 743100f3ab9..c33bc4f4d00 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2808,6 +2808,23 @@ failed: return err; } +static void stop_discovery_complete(struct hci_dev *hdev, u8 status) +{ + BT_DBG("status %d", status); + + hci_dev_lock(hdev); + + if (status) { + mgmt_stop_discovery_failed(hdev, status); + goto unlock; + } + + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + +unlock: + hci_dev_unlock(hdev); +} + static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -2815,6 +2832,8 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, struct pending_cmd *cmd; struct hci_cp_remote_name_req_cancel cp; struct inquiry_entry *e; + struct hci_request req; + struct hci_cp_le_set_scan_enable enable_cp; int err; BT_DBG("%s", hdev->name); @@ -2841,12 +2860,20 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } + hci_req_init(&req, hdev); + switch (hdev->discovery.state) { case DISCOVERY_FINDING: - if (test_bit(HCI_INQUIRY, &hdev->flags)) - err = hci_cancel_inquiry(hdev); - else - err = hci_cancel_le_scan(hdev); + if (test_bit(HCI_INQUIRY, &hdev->flags)) { + hci_req_add(&req, HCI_OP_INQUIRY_CANCEL, 0, NULL); + } else { + cancel_delayed_work(&hdev->le_scan_disable); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_DISABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, + sizeof(enable_cp), &enable_cp); + } break; @@ -2864,16 +2891,22 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, } bacpy(&cp.bdaddr, &e->data.bdaddr); - err = hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ_CANCEL, - sizeof(cp), &cp); + hci_req_add(&req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), + &cp); break; default: BT_DBG("unknown discovery state %u", hdev->discovery.state); - err = -EFAULT; + + mgmt_pending_remove(cmd); + err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, + MGMT_STATUS_FAILED, &mgmt_cp->type, + sizeof(mgmt_cp->type)); + goto unlock; } + err = hci_req_run(&req, stop_discovery_complete); if (err < 0) mgmt_pending_remove(cmd); else -- cgit v1.2.3 From 82f4785ca7b8d04ca6d0aaa37f1185c779744bc4 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:34 -0300 Subject: Bluetooth: Remove stop discovery handling from hci_event.c Since all mgmt stop discovery command complete events are now handled in stop_discovery_complete callback in mgmt.c, we can remove this handling from hci_event.c. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index faaf1f31345..27f66dc88c9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -40,21 +40,13 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (status) { - hci_dev_lock(hdev); - mgmt_stop_discovery_failed(hdev, status); - hci_dev_unlock(hdev); + if (status) return; - } clear_bit(HCI_INQUIRY, &hdev->flags); smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */ wake_up_bit(&hdev->flags, HCI_INQUIRY); - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - hci_conn_check_pending(hdev); } @@ -966,12 +958,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, break; case LE_SCAN_DISABLE: - if (status) { - hci_dev_lock(hdev); - mgmt_stop_discovery_failed(hdev, status); - hci_dev_unlock(hdev); + if (status) return; - } clear_bit(HCI_LE_SCAN, &hdev->dev_flags); break; -- cgit v1.2.3 From 1183fdcad42495073045a2d9755e0a6ac2fa874e Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:35 -0300 Subject: Bluetooth: Make mgmt_stop_discovery_failed static mgmt_stop_discovery_failed is now only used in mgmt.c so we can make it a local function. This patch also moves the mgmt_stop_ discovery_failed definition up in mgmt.c to avoid forward declaration. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c33bc4f4d00..69d17205745 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2808,6 +2808,22 @@ failed: return err; } +static int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status) +{ + struct pending_cmd *cmd; + int err; + + cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev); + if (!cmd) + return -ENOENT; + + err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), + &hdev->discovery.type, sizeof(hdev->discovery.type)); + mgmt_pending_remove(cmd); + + return err; +} + static void stop_discovery_complete(struct hci_dev *hdev, u8 status) { BT_DBG("status %d", status); @@ -4215,22 +4231,6 @@ int mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, sizeof(*ev) + eir_len, NULL); } -int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status) -{ - struct pending_cmd *cmd; - int err; - - cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev); - if (!cmd) - return -ENOENT; - - err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), - &hdev->discovery.type, sizeof(hdev->discovery.type)); - mgmt_pending_remove(cmd); - - return err; -} - int mgmt_discovering(struct hci_dev *hdev, u8 discovering) { struct mgmt_ev_discovering ev; -- cgit v1.2.3 From 3fd319b830247a3fe5f489e622ab404b618e0906 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:36 -0300 Subject: Bluetooth: Refactor hci_cc_le_set_scan_enable This patch does a trivial refactoring in hci_cc_le_set_scan_enable. Since start and stop discovery command failures are now handled in mgmt layer, the status check became empty. So, we can move it to outside the switch statement. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 27f66dc88c9..76ff1af0569 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -949,18 +949,15 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, if (!cp) return; + if (status) + return; + switch (cp->enable) { case LE_SCAN_ENABLE: - if (status) - return; - set_bit(HCI_LE_SCAN, &hdev->dev_flags); break; case LE_SCAN_DISABLE: - if (status) - return; - clear_bit(HCI_LE_SCAN, &hdev->dev_flags); break; -- cgit v1.2.3 From 917eedc56c65ba96a3bab4c346d948e73dd872f1 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:37 -0300 Subject: Bluetooth: Remove LE scan helpers This patch removes the LE scan helpers hci_le_scan and hci_cancel_ le_scan and all code related to it. We now use the HCI request framework in device discovery functionality and these helpers are no longer needed. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 113 ----------------------------------------------- 1 file changed, 113 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9270d7ee489..100539fcbfe 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1201,8 +1201,6 @@ static int hci_dev_do_close(struct hci_dev *hdev) { BT_DBG("%s %p", hdev->name, hdev); - cancel_work_sync(&hdev->le_scan); - cancel_delayed_work(&hdev->power_off); hci_req_cancel(hdev, ENODEV); @@ -1991,82 +1989,6 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) return mgmt_device_unblocked(hdev, bdaddr, type); } -static void le_scan_param_req(struct hci_request *req, unsigned long opt) -{ - struct le_scan_params *param = (struct le_scan_params *) opt; - struct hci_cp_le_set_scan_param cp; - - memset(&cp, 0, sizeof(cp)); - cp.type = param->type; - cp.interval = cpu_to_le16(param->interval); - cp.window = cpu_to_le16(param->window); - - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); -} - -static void le_scan_enable_req(struct hci_request *req, unsigned long opt) -{ - struct hci_cp_le_set_scan_enable cp; - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_ENABLE; - cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); -} - -static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, - u16 window, int timeout) -{ - long timeo = msecs_to_jiffies(3000); - struct le_scan_params param; - int err; - - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - return -EINPROGRESS; - - param.type = type; - param.interval = interval; - param.window = window; - - hci_req_lock(hdev); - - err = __hci_req_sync(hdev, le_scan_param_req, (unsigned long) ¶m, - timeo); - if (!err) - err = __hci_req_sync(hdev, le_scan_enable_req, 0, timeo); - - hci_req_unlock(hdev); - - if (err < 0) - return err; - - queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - timeout); - - return 0; -} - -int hci_cancel_le_scan(struct hci_dev *hdev) -{ - BT_DBG("%s", hdev->name); - - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - return -EALREADY; - - if (cancel_delayed_work(&hdev->le_scan_disable)) { - struct hci_cp_le_set_scan_enable cp; - - /* Send HCI command to disable LE Scan */ - memset(&cp, 0, sizeof(cp)); - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - } - - return 0; -} - static void inquiry_complete(struct hci_dev *hdev, u8 status) { if (status) { @@ -2143,40 +2065,6 @@ static void le_scan_disable_work(struct work_struct *work) BT_ERR("Disable LE scanning request failed: err %d", err); } -static void le_scan_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, le_scan); - struct le_scan_params *param = &hdev->le_scan_params; - - BT_DBG("%s", hdev->name); - - hci_do_le_scan(hdev, param->type, param->interval, param->window, - param->timeout); -} - -int hci_le_scan(struct hci_dev *hdev, u8 type, u16 interval, u16 window, - int timeout) -{ - struct le_scan_params *param = &hdev->le_scan_params; - - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) - return -ENOTSUPP; - - if (work_busy(&hdev->le_scan)) - return -EINPROGRESS; - - param->type = type; - param->interval = interval; - param->window = window; - param->timeout = timeout; - - queue_work(system_long_wq, &hdev->le_scan); - - return 0; -} - /* Alloc HCI device */ struct hci_dev *hci_alloc_dev(void) { @@ -2211,7 +2099,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->cmd_work, hci_cmd_work); INIT_WORK(&hdev->tx_work, hci_tx_work); INIT_WORK(&hdev->power_on, hci_power_on); - INIT_WORK(&hdev->le_scan, le_scan_work); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); -- cgit v1.2.3 From b0434345f2a7330be5277b63606cff26a7965982 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:38 -0300 Subject: Bluetooth: Remove inquiry helpers This patch removes hci_do_inquiry and hci_cancel_inquiry helpers. We now use the HCI request framework in device discovery functionality and these helpers are no longer needed. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 100539fcbfe..e2e9d409d0f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3501,36 +3501,6 @@ static void hci_cmd_work(struct work_struct *work) } } -int hci_do_inquiry(struct hci_dev *hdev, u8 length) -{ - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - struct hci_cp_inquiry cp; - - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_INQUIRY, &hdev->flags)) - return -EINPROGRESS; - - hci_inquiry_cache_flush(hdev); - - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = length; - - return hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); -} - -int hci_cancel_inquiry(struct hci_dev *hdev) -{ - BT_DBG("%s", hdev->name); - - if (!test_bit(HCI_INQUIRY, &hdev->flags)) - return -EALREADY; - - return hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL); -} - u8 bdaddr_to_le(u8 bdaddr_type) { switch (bdaddr_type) { -- cgit v1.2.3 From 8892d8beb37cb4ea531a5076946d5cc809b04c25 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:39 -0300 Subject: Bluetooth: Remove empty event handler This patch removes the hci_cc_le_set_scan_param event handler. This handler became empty because failures of this event are now handled by start_discovery_complete function in mgmt.c. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 76ff1af0569..db58e72316b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -929,14 +929,6 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); } -static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - -} - static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) { @@ -2251,10 +2243,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_user_passkey_neg_reply(hdev, skb); break; - case HCI_OP_LE_SET_SCAN_PARAM: - hci_cc_le_set_scan_param(hdev, skb); - break; - case HCI_OP_LE_SET_ADV_ENABLE: hci_cc_le_set_adv_enable(hdev, skb); break; -- cgit v1.2.3 From 12602d0cc005354a519b3eba443d7912ab71313a Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Tue, 30 Apr 2013 15:29:40 -0300 Subject: Bluetooth: Mgmt Device Found Event We only want to send Mgmt Device Found Events if we are running the Device Discovery procedure (started by the MGMT Start Discovery Command). Inquiry or LE scanning triggered by HCI raw interface (e.g. hcitool) or kernel internals should not send Mgmt Device Found Events. Signed-off-by: Andre Guedes Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 69d17205745..7ae737fcf5e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4180,6 +4180,9 @@ int mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, struct mgmt_ev_device_found *ev = (void *) buf; size_t ev_size; + if (!hci_discovery_active(hdev)) + return -EPERM; + /* Leave 5 bytes for a potential CoD field */ if (sizeof(*ev) + eir_len + 5 > sizeof(buf)) return -EINVAL; -- cgit v1.2.3 From 034cbea0931433cf88a1f79a385402604f08bd67 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 14 May 2013 11:44:16 +0300 Subject: Bluetooth: Use HCI_MGMT instead of HCI_LINK_KEYS flag Use HCI_MGMT flag instead of HCI_LINK_KEYS flag. There is a problem with HCI_LINK_KEYS flag since it is set only when link keys are loaded. Otherwise kernel assumes that old interface is used. Signed-off-by: Andrei Emeltchenko Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index db58e72316b..0437200d92f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2611,7 +2611,7 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s", hdev->name); - if (!test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) + if (!test_bit(HCI_MGMT, &hdev->dev_flags)) return; hci_dev_lock(hdev); @@ -2687,7 +2687,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_drop(conn); } - if (test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) + if (test_bit(HCI_MGMT, &hdev->dev_flags)) hci_add_link_key(hdev, conn, 1, &ev->bdaddr, ev->link_key, ev->key_type, pin_len); -- cgit v1.2.3 From 0a804654af62dfea4899c66561d74d72273b2921 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 14 May 2013 11:44:17 +0300 Subject: Bluetooth: Remove unneeded flag Remove HCI_LINK_KEYS flag since using HCI_MGMT is enough for test that user space expects the kernel managing link keys. Signed-off-by: Andrei Emeltchenko Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/mgmt.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7ae737fcf5e..fedc5399d46 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1736,8 +1736,6 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, hci_link_keys_clear(hdev); - set_bit(HCI_LINK_KEYS, &hdev->dev_flags); - if (cp->debug_keys) set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags); else -- cgit v1.2.3 From 673e1dd7ed7701cac8c5c247d152fd3d2da2a4f1 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 13 May 2013 10:07:11 +0800 Subject: Bluetooth: hidp: using strlcpy instead of strncpy, also beautify code. For NULL terminated string, need always let it ended by zero. Since have already called memcpy() to initialize 'ci', so need not redundant initialization. Better use ''if(session->hid) {} else if(session->input) {}"" instead of ''if(session->hid) {}; if(session->input) {};'' Signed-off-by: Chen Gang Reviewed-by: David Herrmann Acked-by: Jiri Kosina Signed-off-by: Gustavo Padovan --- net/bluetooth/hidp/core.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 940f5acb669..f13a8da441a 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -76,25 +76,19 @@ static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo ci->flags = session->flags; ci->state = BT_CONNECTED; - ci->vendor = 0x0000; - ci->product = 0x0000; - ci->version = 0x0000; - if (session->input) { ci->vendor = session->input->id.vendor; ci->product = session->input->id.product; ci->version = session->input->id.version; if (session->input->name) - strncpy(ci->name, session->input->name, 128); + strlcpy(ci->name, session->input->name, 128); else - strncpy(ci->name, "HID Boot Device", 128); - } - - if (session->hid) { + strlcpy(ci->name, "HID Boot Device", 128); + } else if (session->hid) { ci->vendor = session->hid->vendor; ci->product = session->hid->product; ci->version = session->hid->version; - strncpy(ci->name, session->hid->name, 128); + strlcpy(ci->name, session->hid->name, 128); } } -- cgit v1.2.3 From b8f4e068004859eefac7b1ced59ddb67ca6d2d80 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Thu, 13 Jun 2013 12:34:31 +0100 Subject: Bluetooth: Improve comments on the HCI_Delete_Store_Link_Key issue Some Bluetooth controllers doesn't support this command so we first need to check for its support before sending it. This patch adds a lengthful commentary about this. Signed-off-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e2e9d409d0f..061523eb52a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -597,7 +597,15 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) struct hci_dev *hdev = req->hdev; u8 p; - /* Only send HCI_Delete_Stored_Link_Key if it is supported */ + /* Some Broadcom based Bluetooth controllers do not support the + * Delete Stored Link Key command. They are clearly indicating its + * absence in the bit mask of supported commands. + * + * Check the supported commands and only if the the command is marked + * as supported send it. If not supported assume that the controller + * does not have actual support for stored link keys which makes this + * command redundant anyway. + */ if (hdev->commands[6] & 0x80) { struct hci_cp_delete_stored_link_key cp; -- cgit v1.2.3 From b33698e267ff17a97ecf3ee621a925bb356e9120 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 20 Jun 2013 16:30:00 +0800 Subject: ipv6: remove a useless pr_info() in addrconf_gre_config() This is debug info, should at least be pr_debug(), but given that this code is in upstream for two years, there is no need to keep this debugging printk any more, so just remove it. Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 80449121afa..90788a1c6bb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2760,8 +2760,6 @@ static void addrconf_gre_config(struct net_device *dev) struct inet6_dev *idev; struct in6_addr addr; - pr_info("%s(%s)\n", __func__, dev->name); - ASSERT_RTNL(); if ((idev = ipv6_find_idev(dev)) == NULL) { -- cgit v1.2.3 From b3a6dfe8178c5159e54117078134fef806a913ca Mon Sep 17 00:00:00 2001 From: Asias He Date: Thu, 20 Jun 2013 17:20:30 +0800 Subject: VSOCK: Introduce vsock_auto_bind helper This peace of code is called three times, let's have a helper for it. Signed-off-by: Asias He Acked-by: Andy King Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 49 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3f77f42a3b5..b0b362ad051 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -165,6 +165,18 @@ static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; static DEFINE_SPINLOCK(vsock_table_lock); +/* Autobind this socket to the local address if necessary. */ +static int vsock_auto_bind(struct vsock_sock *vsk) +{ + struct sock *sk = sk_vsock(vsk); + struct sockaddr_vm local_addr; + + if (vsock_addr_bound(&vsk->local_addr)) + return 0; + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + return __vsock_bind(sk, &local_addr); +} + static void vsock_init_tables(void) { int i; @@ -956,15 +968,10 @@ static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, lock_sock(sk); - if (!vsock_addr_bound(&vsk->local_addr)) { - struct sockaddr_vm local_addr; - - vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); - err = __vsock_bind(sk, &local_addr); - if (err != 0) - goto out; + err = vsock_auto_bind(vsk); + if (err) + goto out; - } /* If the provided message contains an address, use that. Otherwise * fall back on the socket's remote handle (if it has been connected). @@ -1038,15 +1045,9 @@ static int vsock_dgram_connect(struct socket *sock, lock_sock(sk); - if (!vsock_addr_bound(&vsk->local_addr)) { - struct sockaddr_vm local_addr; - - vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); - err = __vsock_bind(sk, &local_addr); - if (err != 0) - goto out; - - } + err = vsock_auto_bind(vsk); + if (err) + goto out; if (!transport->dgram_allow(remote_addr->svm_cid, remote_addr->svm_port)) { @@ -1163,17 +1164,9 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); - /* Autobind this socket to the local address if necessary. */ - if (!vsock_addr_bound(&vsk->local_addr)) { - struct sockaddr_vm local_addr; - - vsock_addr_init(&local_addr, VMADDR_CID_ANY, - VMADDR_PORT_ANY); - err = __vsock_bind(sk, &local_addr); - if (err != 0) - goto out; - - } + err = vsock_auto_bind(vsk); + if (err) + goto out; sk->sk_state = SS_CONNECTING; -- cgit v1.2.3 From dce1a2877778fee172ab74411fcabd77bceb8e12 Mon Sep 17 00:00:00 2001 From: Asias He Date: Thu, 20 Jun 2013 17:20:31 +0800 Subject: VSOCK: Return VMCI_ERROR_NO_MEM when fails to allocate skb vmci_transport_recv_dgram_cb always return VMCI_SUCESS even if we fail to allocate skb, return VMCI_ERROR_NO_MEM instead. Signed-off-by: Asias He Acked-by: Andy King Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index daff75200e2..99b511ddb4c 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -625,13 +625,14 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) /* Attach the packet to the socket's receive queue as an sk_buff. */ skb = alloc_skb(size, GFP_ATOMIC); - if (skb) { - /* sk_receive_skb() will do a sock_put(), so hold here. */ - sock_hold(sk); - skb_put(skb, size); - memcpy(skb->data, dg, size); - sk_receive_skb(sk, skb, 0); - } + if (!skb) + return VMCI_ERROR_NO_MEM; + + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, dg, size); + sk_receive_skb(sk, skb, 0); return VMCI_SUCCESS; } -- cgit v1.2.3 From 0fc932467688e1c81fc109a93f323cef4993dc24 Mon Sep 17 00:00:00 2001 From: Asias He Date: Thu, 20 Jun 2013 17:20:32 +0800 Subject: VSOCK: Remove unnecessary label Signed-off-by: Asias He Acked-by: Andy King Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 99b511ddb4c..ffc11df02af 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -940,10 +940,9 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work) * reset to prevent that. */ vmci_transport_send_reset(sk, pkt); - goto out; + break; } -out: release_sock(sk); kfree(recv_pkt_info); /* Release reference obtained in the stream callback when we fetched -- cgit v1.2.3 From a49dd9dcb50195b35a5e59eb65b8e56584874630 Mon Sep 17 00:00:00 2001 From: Asias He Date: Thu, 20 Jun 2013 17:20:33 +0800 Subject: VSOCK: Fix VSOCK_HASH and VSOCK_CONN_HASH If we mod with VSOCK_HASH_SIZE -1, we get 0, 1, .... 249. Actually, we have vsock_bind_table[0 ... 250] and vsock_connected_table[0 .. 250]. In this case the last entry will never be used. We should mod with VSOCK_HASH_SIZE instead. Signed-off-by: Asias He Acked-by: Andy King Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b0b362ad051..593071dabd1 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -144,18 +144,18 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function - * mods with VSOCK_HASH_SIZE - 1 to ensure this. + * mods with VSOCK_HASH_SIZE to ensure this. */ #define VSOCK_HASH_SIZE 251 #define MAX_PORT_RETRIES 24 -#define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) +#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) #define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) /* XXX This can probably be implemented in a better way. */ #define VSOCK_CONN_HASH(src, dst) \ - (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) + (((src)->svm_cid ^ (dst)->svm_port) % VSOCK_HASH_SIZE) #define vsock_connected_sockets(src, dst) \ (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) #define vsock_connected_sockets_vsk(vsk) \ -- cgit v1.2.3 From 60877a32bce00041528576e6b8df5abe9251fa73 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jun 2013 01:15:51 -0700 Subject: net: allow large number of tx queues netif_alloc_netdev_queues() uses kcalloc() to allocate memory for the "struct netdev_queue *_tx" array. For large number of tx queues, kcalloc() might fail, so this patch does a fallback to vzalloc(). As vmalloc() adds overhead on a critical network path, add __GFP_REPEAT to kzalloc() flags to do this fallback only when really needed. Signed-off-by: Eric Dumazet Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fa007dba6be..722f633926e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -130,6 +130,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -5253,17 +5254,28 @@ static void netdev_init_one_queue(struct net_device *dev, #endif } +static void netif_free_tx_queues(struct net_device *dev) +{ + if (is_vmalloc_addr(dev->_tx)) + vfree(dev->_tx); + else + kfree(dev->_tx); +} + static int netif_alloc_netdev_queues(struct net_device *dev) { unsigned int count = dev->num_tx_queues; struct netdev_queue *tx; + size_t sz = count * sizeof(*tx); - BUG_ON(count < 1); - - tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); - if (!tx) - return -ENOMEM; + BUG_ON(count < 1 || count > 0xffff); + tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!tx) { + tx = vzalloc(sz); + if (!tx) + return -ENOMEM; + } dev->_tx = tx; netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); @@ -5811,7 +5823,7 @@ free_all: free_pcpu: free_percpu(dev->pcpu_refcnt); - kfree(dev->_tx); + netif_free_tx_queues(dev); #ifdef CONFIG_RPS kfree(dev->_rx); #endif @@ -5836,7 +5848,7 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); - kfree(dev->_tx); + netif_free_tx_queues(dev); #ifdef CONFIG_RPS kfree(dev->_rx); #endif -- cgit v1.2.3 From 7c77602f57da3f526fa7cf7bb02c49d3397c0729 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 21 Jun 2013 15:37:25 +0800 Subject: bridge: fix a typo in comments Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 31952a10394..81befac015e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1012,7 +1012,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, #endif /* - * Add port to rotuer_list + * Add port to router_list * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ -- cgit v1.2.3 From 479b1a5825f68f9b63d26a13ca25ffbb7d2617ad Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 20 Jun 2013 15:08:14 -0700 Subject: openvswitch: Use correct config guard. This bug was introduced by commit aa310701e787087 (openvswitch: Add gre tunnel support.) Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/vport-gre.c | 2 +- net/openvswitch/vport.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 3a8d1900aa7..943e5c43135 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -16,7 +16,7 @@ * 02110-1301, USA */ -#ifdef CONFIG_NET_IPGRE_DEMUX +#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index f52dfb9cb5a..ba81294219a 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -39,7 +39,7 @@ static const struct vport_ops *vport_ops_list[] = { &ovs_netdev_vport_ops, &ovs_internal_vport_ops, -#ifdef CONFIG_NET_IPGRE_DEMUX +#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) &ovs_gre_vport_ops, #endif }; -- cgit v1.2.3 From aeb193ea6cef28e33589de05ef932424f8e19bde Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Sun, 23 Jun 2013 23:33:48 -0700 Subject: net: Unmap fragment page once iterator is done Callers of skb_seq_read() are currently forced to call skb_abort_seq_read() even when consuming all the data because the last call to skb_seq_read (the one that returns 0 to indicate the end) fails to unmap the last fragment page. With this patch callers will be allowed to traverse the SKB data by calling skb_prepare_seq_read() once and repeatedly calling skb_seq_read() as originally intended (and documented in the original commit 677e90eda), that is, only call skb_abort_seq_read() if the sequential read is actually aborted. Signed-off-by: Wedson Almeida Filho Signed-off-by: David S. Miller --- net/batman-adv/main.c | 1 - net/core/skbuff.c | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 51aafd669cb..08125f3f606 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -473,7 +473,6 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) crc = crc32c(crc, data, len); consumed += len; } - skb_abort_seq_read(&st); return htonl(crc); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index edf37578e21..9f73eca29fb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2541,8 +2541,13 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, unsigned int block_limit, abs_offset = consumed + st->lower_offset; skb_frag_t *frag; - if (unlikely(abs_offset >= st->upper_offset)) + if (unlikely(abs_offset >= st->upper_offset)) { + if (st->frag_data) { + kunmap_atomic(st->frag_data); + st->frag_data = NULL; + } return 0; + } next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; -- cgit v1.2.3 From 6f390908e58113b9199424749c32a05181ff69d9 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 19 Jun 2013 14:06:25 -0700 Subject: wireless: Make sure __cfg80211_connect_result always puts bss Otherwise, we can leak a bss reference. Signed-off-by: Ben Greear Signed-off-by: Johannes Berg --- net/wireless/sme.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index c0bf781d4fb..32dac8cdd2e 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -557,6 +557,7 @@ static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); * SME event handling */ +/* This method must consume bss one way or another */ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len, @@ -572,8 +573,10 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, ASSERT_WDEV_LOCK(wdev); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && - wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) { + cfg80211_put_bss(wdev->wiphy, bss); return; + } nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, bssid, req_ie, req_ie_len, -- cgit v1.2.3 From 0e3a39b5620bc84f25ffb0592b05b0350e8b0520 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 19 Jun 2013 14:06:27 -0700 Subject: wireless: add comments about bss refcounting Should help the next person that tries to understand the bss refcounting logic. Signed-off-by: Ben Greear Signed-off-by: Johannes Berg --- net/wireless/scan.c | 4 ++++ net/wireless/sme.c | 3 +++ 2 files changed, 7 insertions(+) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index dd01b58fa78..ae8c186b50d 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -523,6 +523,7 @@ static int cmp_bss(struct cfg80211_bss *a, } } +/* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, const u8 *bssid, @@ -678,6 +679,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, return true; } +/* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *dev, struct cfg80211_internal_bss *tmp) @@ -866,6 +868,7 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, return channel; } +/* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss* cfg80211_inform_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, @@ -923,6 +926,7 @@ cfg80211_inform_bss(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_inform_bss); +/* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss * cfg80211_inform_bss_frame(struct wiphy *wiphy, struct ieee80211_channel *channel, diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 32dac8cdd2e..1d3cfb1a3f2 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -239,6 +239,7 @@ void cfg80211_conn_work(struct work_struct *work) rtnl_unlock(); } +/* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -699,6 +700,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, } EXPORT_SYMBOL(cfg80211_connect_result); +/* Consumes bss object one way or another */ void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_bss *bss, const u8 *req_ie, size_t req_ie_len, @@ -775,6 +777,7 @@ void cfg80211_roamed(struct net_device *dev, } EXPORT_SYMBOL(cfg80211_roamed); +/* Consumes bss object one way or another */ void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_bss *bss, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, -- cgit v1.2.3 From f9bef3df52fe61067e4c1c6cfb2037cb6b259a6a Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 19 Jun 2013 14:06:26 -0700 Subject: wireless: check for dangling wdev->current_bss pointer If it *is* still set when the netdev is being deleted, then we are about to leak a pointer. Warn and clean up in that case. Signed-off-by: Ben Greear Signed-off-by: Johannes Berg --- net/wireless/core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 4224e7554a7..672459b9483 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -934,6 +934,12 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * freed. */ cfg80211_process_wdev_events(wdev); + + if (WARN_ON(wdev->current_bss)) { + cfg80211_unhold_bss(wdev->current_bss); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); + wdev->current_bss = NULL; + } break; case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) -- cgit v1.2.3 From a33d402610d2d3a422136defe8237f4ddfb69fd9 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Sun, 23 Jun 2013 12:51:21 +0200 Subject: cfg80211: fix compilation warning for cfg80211_leave_all() The following compilation issue popped up moving from v3.10-rc1 to v3.10-rc6 after merging wireless-testing. net/wireless/sysfs.c:86:13: error: 'cfg80211_leave_all' defined but not used [-Werror=unused-function] The function is only called when CONFIG_PM is enabled. Moving the function under CONFIG_PM as well. Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- net/wireless/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 360a42c6f69..a23253e0635 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -83,6 +83,7 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +#ifdef CONFIG_PM static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) { struct wireless_dev *wdev; @@ -91,7 +92,6 @@ static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) cfg80211_leave(rdev, wdev); } -#ifdef CONFIG_PM static int wiphy_suspend(struct device *dev, pm_message_t state) { struct cfg80211_registered_device *rdev = dev_to_rdev(dev); -- cgit v1.2.3 From ac49e1a8969eeb819c4fc2eced9ee9ef9f35a4a9 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 20 Jun 2013 23:50:58 -0700 Subject: mac80211: allow self-protected frame tx without sta Useful for userspace mesh to authenticate and peer without a station entry, since both steps may fail anyway. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 93120de776f..8184d121ff0 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2827,7 +2827,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, !rcu_access_pointer(sdata->bss->beacon)) need_offchan = true; if (!ieee80211_is_action(mgmt->frame_control) || - mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) + mgmt->u.action.category == WLAN_CATEGORY_PUBLIC || + mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED) break; rcu_read_lock(); sta = sta_info_get(sdata, mgmt->da); -- cgit v1.2.3 From 6c7c4cbfd5f59c04a40af67ad72d14e19215ef36 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 20 Jun 2013 23:50:59 -0700 Subject: mac80211: initialize power mode for mesh STAs Previously the default mesh STA nonpeer power mode was UNKNOWN (0) make the default mesh STA power mode ACTIVE, to prevent unnecessary frame buffering while peering is not yet complete. Fixes a panic in ath9k_htc when adding stations from userspace, and mcast buffered frames are later released. Thanks to Bob Copeland for his help debugging this. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index aaf68d29722..aeb967a0aee 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -347,6 +347,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (ieee80211_vif_is_mesh(&sdata->vif) && !sdata->u.mesh.user_mpm) init_timer(&sta->plink_timer); + sta->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; #endif memcpy(sta->sta.addr, addr, ETH_ALEN); -- cgit v1.2.3 From bcbde0d449eda7afa8f63280b165c8300dbd00e2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jun 2013 19:38:07 +0200 Subject: net: netlink: virtual tap device management Similarly to the networking receive path with ptype_all taps, we add the possibility to register netdevices that are for ARPHRD_NETLINK to the netlink subsystem, so that those can be used for netlink analyzers resp. debuggers. We do not offer a direct callback function as out-of-tree modules could do crap with it. Instead, a netdevice must be registered properly and only receives a clone, managed by the netlink layer. Symbols are exported as GPL-only. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 275d901d7e4..6967fbcca6c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -101,6 +102,9 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); static ATOMIC_NOTIFIER_HEAD(netlink_chain); +static DEFINE_SPINLOCK(netlink_tap_lock); +static struct list_head netlink_tap_all __read_mostly; + static inline u32 netlink_group_mask(u32 group) { return group ? 1 << (group - 1) : 0; @@ -111,6 +115,100 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; } +int netlink_add_tap(struct netlink_tap *nt) +{ + if (unlikely(nt->dev->type != ARPHRD_NETLINK)) + return -EINVAL; + + spin_lock(&netlink_tap_lock); + list_add_rcu(&nt->list, &netlink_tap_all); + spin_unlock(&netlink_tap_lock); + + if (nt->module) + __module_get(nt->module); + + return 0; +} +EXPORT_SYMBOL_GPL(netlink_add_tap); + +int __netlink_remove_tap(struct netlink_tap *nt) +{ + bool found = false; + struct netlink_tap *tmp; + + spin_lock(&netlink_tap_lock); + + list_for_each_entry(tmp, &netlink_tap_all, list) { + if (nt == tmp) { + list_del_rcu(&nt->list); + found = true; + goto out; + } + } + + pr_warn("__netlink_remove_tap: %p not found\n", nt); +out: + spin_unlock(&netlink_tap_lock); + + if (found && nt->module) + module_put(nt->module); + + return found ? 0 : -ENODEV; +} +EXPORT_SYMBOL_GPL(__netlink_remove_tap); + +int netlink_remove_tap(struct netlink_tap *nt) +{ + int ret; + + ret = __netlink_remove_tap(nt); + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL_GPL(netlink_remove_tap); + +static int __netlink_deliver_tap_skb(struct sk_buff *skb, + struct net_device *dev) +{ + struct sk_buff *nskb; + int ret = -ENOMEM; + + dev_hold(dev); + nskb = skb_clone(skb, GFP_ATOMIC); + if (nskb) { + nskb->dev = dev; + ret = dev_queue_xmit(nskb); + if (unlikely(ret > 0)) + ret = net_xmit_errno(ret); + } + + dev_put(dev); + return ret; +} + +static void __netlink_deliver_tap(struct sk_buff *skb) +{ + int ret; + struct netlink_tap *tmp; + + list_for_each_entry_rcu(tmp, &netlink_tap_all, list) { + ret = __netlink_deliver_tap_skb(skb, tmp->dev); + if (unlikely(ret)) + break; + } +} + +static void netlink_deliver_tap(struct sk_buff *skb) +{ + rcu_read_lock(); + + if (unlikely(!list_empty(&netlink_tap_all))) + __netlink_deliver_tap(skb); + + rcu_read_unlock(); +} + static void netlink_overrun(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); @@ -1518,6 +1616,8 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; + netlink_deliver_tap(skb); + #ifdef CONFIG_NETLINK_MMAP if (netlink_skb_is_mmaped(skb)) netlink_queue_mmaped_skb(sk, skb); @@ -1578,6 +1678,11 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { + /* We could do a netlink_deliver_tap(skb) here as well + * but since this is intended for the kernel only, we + * should rather let it stay under the hood. + */ + ret = skb->len; netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; @@ -2975,6 +3080,8 @@ static int __init netlink_proto_init(void) nl_table[i].compare = netlink_compare; } + INIT_LIST_HEAD(&netlink_tap_all); + netlink_add_usersock_entry(); sock_register(&netlink_family_ops); -- cgit v1.2.3 From 6da334ee0c101fc5ecf62f2b1e11b1524be7b159 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Jun 2013 01:30:11 -0700 Subject: ipv6: add include file to suppress sparse warnings commit f88c91ddba95 ("ipv6: statically link register_inet6addr_notifier()" added following sparse warnings : net/ipv6/addrconf_core.c:83:5: warning: symbol 'register_inet6addr_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:89:5: warning: symbol 'unregister_inet6addr_notifier' was not declared. Should it be static? net/ipv6/addrconf_core.c:95:5: warning: symbol 'inet6addr_notifier_call_chain' was not declared. Should it be static? Signed-off-by: Eric Dumazet Cc: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/addrconf_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 72104562c86..d2f87427244 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -5,6 +5,7 @@ #include #include +#include #define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) -- cgit v1.2.3 From f693dff7107063f0ce08502052b78c4d4feb0e87 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 25 Jun 2013 16:01:55 +0300 Subject: rtnetlink: allow using zero MAC address in rtnl_fdb_{add,del} This is required for multiple default destinations management in VXLAN Signed-off-by: Mike Rapoport Signed-off-by: Stephen Hemminger --- net/core/rtnetlink.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9007533867f..3de740834d1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2109,10 +2109,6 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) } addr = nla_data(tb[NDA_LLADDR]); - if (is_zero_ether_addr(addr)) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n"); - return -EINVAL; - } err = -EOPNOTSUPP; @@ -2210,10 +2206,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) } addr = nla_data(tb[NDA_LLADDR]); - if (is_zero_ether_addr(addr)) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); - return -EINVAL; - } err = -EOPNOTSUPP; -- cgit v1.2.3 From b7b1bfce0bb68bd8f6e62a28295922785cc63781 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 23 Jun 2013 18:39:01 +0200 Subject: ipv6: split duplicate address detection and router solicitation timer This patch splits the timers for duplicate address detection and router solicitations apart. The router solicitations timer goes into inet6_dev and the dad timer stays in inet6_ifaddr. The reason behind this patch is to reduce the number of unneeded router solicitations send out by the host if additional link-local addresses are created. Currently we send out RS for every link-local address on an interface. If the RS timer fires we pick a source address with ipv6_get_lladdr. This change could hurt people adding additional link-local addresses and specifying these addresses in the radvd clients section because we no longer guarantee that we use every ll address as source address in router solicitations. Cc: Flavio Leitner Cc: Hideaki YOSHIFUJI Cc: David Stevens Signed-off-by: Hannes Frederic Sowa Reviewed-by: Flavio Leitner Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 164 +++++++++++++++++++++++++++++----------------------- 1 file changed, 91 insertions(+), 73 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 90788a1c6bb..c06bc76280b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -253,37 +253,32 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev) return !qdisc_tx_is_noop(dev); } -static void addrconf_del_timer(struct inet6_ifaddr *ifp) +static void addrconf_del_rs_timer(struct inet6_dev *idev) { - if (del_timer(&ifp->timer)) + if (del_timer(&idev->rs_timer)) + __in6_dev_put(idev); +} + +static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp) +{ + if (del_timer(&ifp->dad_timer)) __in6_ifa_put(ifp); } -enum addrconf_timer_t { - AC_NONE, - AC_DAD, - AC_RS, -}; +static void addrconf_mod_rs_timer(struct inet6_dev *idev, + unsigned long when) +{ + if (!timer_pending(&idev->rs_timer)) + in6_dev_hold(idev); + mod_timer(&idev->rs_timer, jiffies + when); +} -static void addrconf_mod_timer(struct inet6_ifaddr *ifp, - enum addrconf_timer_t what, - unsigned long when) +static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, + unsigned long when) { - if (!del_timer(&ifp->timer)) + if (!timer_pending(&ifp->dad_timer)) in6_ifa_hold(ifp); - - switch (what) { - case AC_DAD: - ifp->timer.function = addrconf_dad_timer; - break; - case AC_RS: - ifp->timer.function = addrconf_rs_timer; - break; - default: - break; - } - ifp->timer.expires = jiffies + when; - add_timer(&ifp->timer); + mod_timer(&ifp->dad_timer, jiffies + when); } static int snmp6_alloc_dev(struct inet6_dev *idev) @@ -326,6 +321,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) WARN_ON(!list_empty(&idev->addr_list)); WARN_ON(idev->mc_list != NULL); + WARN_ON(timer_pending(&idev->rs_timer)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); @@ -357,7 +353,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) rwlock_init(&ndev->lock); ndev->dev = dev; INIT_LIST_HEAD(&ndev->addr_list); - + setup_timer(&ndev->rs_timer, addrconf_rs_timer, + (unsigned long)ndev); memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; @@ -776,7 +773,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) in6_dev_put(ifp->idev); - if (del_timer(&ifp->timer)) + if (del_timer(&ifp->dad_timer)) pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); if (ifp->state != INET6_IFADDR_STATE_DEAD) { @@ -869,9 +866,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); - init_timer(&ifa->timer); + setup_timer(&ifa->dad_timer, addrconf_dad_timer, + (unsigned long)ifa); INIT_HLIST_NODE(&ifa->addr_lst); - ifa->timer.data = (unsigned long) ifa; ifa->scope = scope; ifa->prefix_len = pfxlen; ifa->flags = flags | IFA_F_TENTATIVE; @@ -994,7 +991,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } write_unlock_bh(&idev->lock); - addrconf_del_timer(ifp); + addrconf_del_dad_timer(ifp); ipv6_ifa_notify(RTM_DELADDR, ifp); @@ -1447,6 +1444,23 @@ try_nextdev: } EXPORT_SYMBOL(ipv6_dev_get_saddr); +static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, + unsigned char banned_flags) +{ + struct inet6_ifaddr *ifp; + int err = -EADDRNOTAVAIL; + + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & banned_flags)) { + *addr = ifp->addr; + err = 0; + break; + } + } + return err; +} + int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags) { @@ -1456,17 +1470,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - struct inet6_ifaddr *ifp; - read_lock_bh(&idev->lock); - list_for_each_entry(ifp, &idev->addr_list, if_list) { - if (ifp->scope == IFA_LINK && - !(ifp->flags & banned_flags)) { - *addr = ifp->addr; - err = 0; - break; - } - } + err = __ipv6_get_lladdr(idev, addr, banned_flags); read_unlock_bh(&idev->lock); } rcu_read_unlock(); @@ -1580,7 +1585,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); - addrconf_del_timer(ifp); + addrconf_del_dad_timer(ifp); ifp->flags |= IFA_F_TENTATIVE; if (dad_failed) ifp->flags |= IFA_F_DADFAILED; @@ -3036,7 +3041,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); - addrconf_del_timer(ifa); + addrconf_del_dad_timer(ifa); goto restart; } } @@ -3045,6 +3050,8 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&idev->lock); + addrconf_del_rs_timer(idev); + /* Step 2: clear flags for stateless addrconf */ if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); @@ -3074,7 +3081,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, struct inet6_ifaddr, if_list); - addrconf_del_timer(ifa); + addrconf_del_dad_timer(ifa); list_del(&ifa->if_list); @@ -3116,10 +3123,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) static void addrconf_rs_timer(unsigned long data) { - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; - struct inet6_dev *idev = ifp->idev; + struct inet6_dev *idev = (struct inet6_dev *)data; + struct in6_addr lladdr; - read_lock(&idev->lock); + write_lock(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) goto out; @@ -3130,18 +3137,19 @@ static void addrconf_rs_timer(unsigned long data) if (idev->if_flags & IF_RA_RCVD) goto out; - spin_lock(&ifp->lock); - if (ifp->probes++ < idev->cnf.rtr_solicits) { - /* The wait after the last probe can be shorter */ - addrconf_mod_timer(ifp, AC_RS, - (ifp->probes == idev->cnf.rtr_solicits) ? - idev->cnf.rtr_solicit_delay : - idev->cnf.rtr_solicit_interval); - spin_unlock(&ifp->lock); + if (idev->rs_probes++ < idev->cnf.rtr_solicits) { + if (!__ipv6_get_lladdr(idev, &lladdr, IFA_F_TENTATIVE)) + ndisc_send_rs(idev->dev, &lladdr, + &in6addr_linklocal_allrouters); + else + goto out; - ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); + /* The wait after the last probe can be shorter */ + addrconf_mod_rs_timer(idev, (idev->rs_probes == + idev->cnf.rtr_solicits) ? + idev->cnf.rtr_solicit_delay : + idev->cnf.rtr_solicit_interval); } else { - spin_unlock(&ifp->lock); /* * Note: we do not support deprecated "all on-link" * assumption any longer. @@ -3150,8 +3158,8 @@ static void addrconf_rs_timer(unsigned long data) } out: - read_unlock(&idev->lock); - in6_ifa_put(ifp); + write_unlock(&idev->lock); + in6_dev_put(idev); } /* @@ -3167,8 +3175,8 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) else rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); - ifp->probes = idev->cnf.dad_transmits; - addrconf_mod_timer(ifp, AC_DAD, rand_num); + ifp->dad_probes = idev->cnf.dad_transmits; + addrconf_mod_dad_timer(ifp, rand_num); } static void addrconf_dad_start(struct inet6_ifaddr *ifp) @@ -3229,40 +3237,40 @@ static void addrconf_dad_timer(unsigned long data) struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; - if (!ifp->probes && addrconf_dad_end(ifp)) + if (!ifp->dad_probes && addrconf_dad_end(ifp)) goto out; - read_lock(&idev->lock); + write_lock(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) { - read_unlock(&idev->lock); + write_unlock(&idev->lock); goto out; } spin_lock(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DEAD) { spin_unlock(&ifp->lock); - read_unlock(&idev->lock); + write_unlock(&idev->lock); goto out; } - if (ifp->probes == 0) { + if (ifp->dad_probes == 0) { /* * DAD was successful */ ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - read_unlock(&idev->lock); + write_unlock(&idev->lock); addrconf_dad_completed(ifp); goto out; } - ifp->probes--; - addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); + ifp->dad_probes--; + addrconf_mod_dad_timer(ifp, ifp->idev->nd_parms->retrans_time); spin_unlock(&ifp->lock); - read_unlock(&idev->lock); + write_unlock(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); @@ -3274,6 +3282,9 @@ out: static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { struct net_device *dev = ifp->idev->dev; + struct in6_addr lladdr; + + addrconf_del_dad_timer(ifp); /* * Configure the address for reception. Now it is valid. @@ -3294,13 +3305,20 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) * [...] as part of DAD [...] there is no need * to delay again before sending the first RS */ - ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); + if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) + ndisc_send_rs(dev, &lladdr, + &in6addr_linklocal_allrouters); + else + return; - spin_lock_bh(&ifp->lock); - ifp->probes = 1; + write_lock_bh(&ifp->idev->lock); + spin_lock(&ifp->lock); + ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; - addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); - spin_unlock_bh(&ifp->lock); + addrconf_mod_rs_timer(ifp->idev, + ifp->idev->cnf.rtr_solicit_interval); + spin_unlock(&ifp->lock); + write_unlock_bh(&ifp->idev->lock); } } -- cgit v1.2.3 From 876fd05ddbae03166e7037fca957b55bb3be6594 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 24 Jun 2013 00:22:20 +0200 Subject: ipv6: don't disable interface if last ipv6 address is removed The reason behind this change is that as soon as we delete the last ipv6 address of an interface we also lose the /proc/sys/net/ipv6/conf/ directory. This seems to be a usability problem for me. I don't see any reason why we should shutdown ipv6 on that interface in such cases. Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c06bc76280b..e799a8838ed 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2507,12 +2507,6 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p read_unlock_bh(&idev->lock); ipv6_del_addr(ifp); - - /* If the last address is deleted administratively, - disable IPv6 on this interface. - */ - if (list_empty(&idev->addr_list)) - addrconf_ifdown(idev->dev, 1); return 0; } } -- cgit v1.2.3 From 2b9651d72d3fc1a9053ae1a323f8407e1f63b436 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 24 Jun 2013 22:03:28 +0200 Subject: ipv6: remove old token ipv6 address as soon as possible If the tokenized ip address is re-set on an interface we depend on the arrival of a new router advertisment to call addrconf_verify to clean up the old address (which valid_lft is now set to 0). Old addresses can linger around for a longer time if e.g. the source of router advertisments vanishes. So, call addrconf_verify immediately after setting the new tokenized address to get rid of the old tokenized addresses. Cc: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e799a8838ed..afaf3cdadf5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4375,6 +4375,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); + addrconf_verify(0); return 0; } -- cgit v1.2.3 From 52db882f3fc2903014e638ee91e690085fe37fdb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 25 Jun 2013 18:17:27 +0200 Subject: net: sctp: migrate cookie life from timeval to ktime Currently, SCTP code defines its own timeval functions (since timeval is rarely used inside the kernel by others), namely tv_lt() and TIMEVAL_ADD() macros, that operate on SCTP cookie expiration. We might as well remove all those, and operate directly on ktime structures for a couple of reasons: ktime is available on all archs; complexity of ktime calculations depending on the arch is less than (reduces to a simple arithmetic operations on archs with BITS_PER_LONG == 64 or CONFIG_KTIME_SCALAR) or equal to timeval functions (other archs); code becomes more readable; macros can be thrown out. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 8 +------- net/sctp/sm_make_chunk.c | 19 ++++++++----------- net/sctp/socket.c | 14 +++----------- 3 files changed, 12 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index bf6e6bd553c..9a383a8774e 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -102,13 +102,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); asoc->state = SCTP_STATE_CLOSED; - - /* Set these values from the socket values, a conversion between - * millsecons to seconds/microseconds must also be done. - */ - asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000; - asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000) - * 1000; + asoc->cookie_life = ms_to_ktime(sp->assocparams.sasoc_cookie_life); asoc->frag_point = 0; asoc->user_frag = sp->user_frag; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fc8548743ed..dd71f1f9ba1 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1630,8 +1630,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, cookie->c.adaptation_ind = asoc->peer.adaptation_ind; /* Set an expiration time for the cookie. */ - do_gettimeofday(&cookie->c.expiration); - TIMEVAL_ADD(asoc->cookie_life, cookie->c.expiration); + cookie->c.expiration = ktime_add(asoc->cookie_life, + ktime_get()); /* Copy the peer's init packet. */ memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr, @@ -1680,7 +1680,7 @@ struct sctp_association *sctp_unpack_cookie( unsigned int len; sctp_scope_t scope; struct sk_buff *skb = chunk->skb; - struct timeval tv; + ktime_t kt; struct hash_desc desc; /* Header size is static data prior to the actual cookie, including @@ -1757,11 +1757,11 @@ no_hmac: * down the new association establishment instead of every packet. */ if (sock_flag(ep->base.sk, SOCK_TIMESTAMP)) - skb_get_timestamp(skb, &tv); + kt = skb_get_ktime(skb); else - do_gettimeofday(&tv); + kt = ktime_get(); - if (!asoc && tv_lt(bear_cookie->expiration, tv)) { + if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) { /* * Section 3.3.10.3 Stale Cookie Error (3) * @@ -1773,9 +1773,7 @@ no_hmac: len = ntohs(chunk->chunk_hdr->length); *errp = sctp_make_op_error_space(asoc, chunk, len); if (*errp) { - suseconds_t usecs = (tv.tv_sec - - bear_cookie->expiration.tv_sec) * 1000000L + - tv.tv_usec - bear_cookie->expiration.tv_usec; + suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration)); __be32 n = htonl(usecs); sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, @@ -2514,8 +2512,7 @@ do_addr_param: /* Suggested Cookie Life span increment's unit is msec, * (1/1000sec). */ - asoc->cookie_life.tv_sec += stale / 1000; - asoc->cookie_life.tv_usec += (stale % 1000) * 1000; + asoc->cookie_life = ktime_add_ms(asoc->cookie_life, stale); break; case SCTP_PARAM_HOST_NAME_ADDRESS: diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 32db19ba4a2..4c47e5578d7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2910,13 +2910,8 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsig asoc->max_retrans = assocparams.sasoc_asocmaxrxt; } - if (assocparams.sasoc_cookie_life != 0) { - asoc->cookie_life.tv_sec = - assocparams.sasoc_cookie_life / 1000; - asoc->cookie_life.tv_usec = - (assocparams.sasoc_cookie_life % 1000) - * 1000; - } + if (assocparams.sasoc_cookie_life != 0) + asoc->cookie_life = ms_to_ktime(assocparams.sasoc_cookie_life); } else { /* Set the values to the endpoint */ struct sctp_sock *sp = sctp_sk(sk); @@ -5074,10 +5069,7 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len, assocparams.sasoc_asocmaxrxt = asoc->max_retrans; assocparams.sasoc_peer_rwnd = asoc->peer.rwnd; assocparams.sasoc_local_rwnd = asoc->a_rwnd; - assocparams.sasoc_cookie_life = (asoc->cookie_life.tv_sec - * 1000) + - (asoc->cookie_life.tv_usec - / 1000); + assocparams.sasoc_cookie_life = ktime_to_ms(asoc->cookie_life); list_for_each(pos, &asoc->peer.transport_addr_list) { cnt ++; -- cgit v1.2.3 From b527fe693304d244b6103dc9f8a87150e71c29f7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 25 Jun 2013 18:17:28 +0200 Subject: net: sctp: minor: sctp_seq_dump_local_addrs add missing newline A trailing newline has been forgotten to add into the WARN(). Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 0c83162a6bf..62526c47705 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -138,7 +138,7 @@ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_commo peer = asoc->peer.primary_path; if (unlikely(peer == NULL)) { - WARN(1, "Association %p with NULL primary path!", asoc); + WARN(1, "Association %p with NULL primary path!\n", asoc); return; } -- cgit v1.2.3 From 0a2fbac197441ebeafbbef09d4bbc0b5e73716d7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 25 Jun 2013 18:17:29 +0200 Subject: net: sctp: decouple cleaning some socket data from endpoint Rather instead of having the endpoint clean the garbage from the socket, use a sk_destruct handler sctp_destruct_sock(), that does the job for that when there are no more references on the socket. At least do this for our crypto transform through crypto_free_hash() that is allocated when in listening state. Also, perform sctp_put_port() only when sk is valid. At a later point in time we can still determine if there's an option of placing this into sk_prot->unhash() or sctp_endpoint_free() without any races. For now, leave it in sctp_endpoint_destroy() though. Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/endpointola.c | 19 ++++++++++--------- net/sctp/socket.c | 16 +++++++++++++++- 2 files changed, 25 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index a8b26741c0a..b26999d508b 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -247,10 +247,9 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) /* Final destructor for endpoint. */ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { - SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); + struct sock *sk; - /* Free up the HMAC transform. */ - crypto_free_hash(sctp_sk(ep->base.sk)->hmac); + SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); /* Free the digest buffer */ kfree(ep->digest); @@ -271,13 +270,15 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) memset(ep->secret_key, 0, sizeof(ep->secret_key)); - /* Remove and free the port */ - if (sctp_sk(ep->base.sk)->bind_hash) - sctp_put_port(ep->base.sk); - /* Give up our hold on the sock. */ - if (ep->base.sk) - sock_put(ep->base.sk); + sk = ep->base.sk; + if (sk != NULL) { + /* Remove and free the port */ + if (sctp_sk(sk)->bind_hash) + sctp_put_port(sk); + + sock_put(sk); + } kfree(ep); SCTP_DBG_OBJCNT_DEC(ep); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4c47e5578d7..ba9359c20c8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -93,6 +93,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); static void sctp_wait_for_close(struct sock *sk, long timeo); +static void sctp_destruct_sock(struct sock *sk); static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, union sctp_addr *addr, int len); static int sctp_bindx_add(struct sock *, struct sockaddr *, int); @@ -3966,6 +3967,8 @@ static int sctp_init_sock(struct sock *sk) sp->hmac = NULL; + sk->sk_destruct = sctp_destruct_sock; + SCTP_DBG_OBJCNT_INC(sock); local_bh_disable(); @@ -4008,6 +4011,17 @@ static void sctp_destroy_sock(struct sock *sk) local_bh_enable(); } +/* Triggered when there are no references on the socket anymore */ +static void sctp_destruct_sock(struct sock *sk) +{ + struct sctp_sock *sp = sctp_sk(sk); + + /* Free up the HMAC transform. */ + crypto_free_hash(sp->hmac); + + inet_sock_destruct(sk); +} + /* API 4.1.7 shutdown() - TCP Style Syntax * int shutdown(int socket, int how); * @@ -6848,7 +6862,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_reuse = sk->sk_reuse; newsk->sk_shutdown = sk->sk_shutdown; - newsk->sk_destruct = inet_sock_destruct; + newsk->sk_destruct = sctp_destruct_sock; newsk->sk_family = sk->sk_family; newsk->sk_protocol = IPPROTO_SCTP; newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; -- cgit v1.2.3 From 62208f12451f723cd9e9f1d6d22866a61545e488 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 25 Jun 2013 18:17:30 +0200 Subject: net: sctp: simplify sctp_get_port No need to have an extra ret variable when we directly can return the value of sctp_get_port_local(). Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ba9359c20c8..66fcdcfe1b7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6036,7 +6036,6 @@ fail: */ static int sctp_get_port(struct sock *sk, unsigned short snum) { - long ret; union sctp_addr addr; struct sctp_af *af = sctp_sk(sk)->pf->af; @@ -6045,9 +6044,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) addr.v4.sin_port = htons(snum); /* Note: sk->sk_num gets filled in if ephemeral port request. */ - ret = sctp_get_port_local(sk, &addr); - - return ret ? 1 : 0; + return !!sctp_get_port_local(sk, &addr); } /* -- cgit v1.2.3 From 2d48d67fa8cd129ea85ea02d91b4a793286866f8 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 24 Jun 2013 10:28:03 +0300 Subject: net: poll/select low latency socket support select/poll busy-poll support. Split sysctl value into two separate ones, one for read and one for poll. updated Documentation/sysctl/net.txt Add a new poll flag POLL_LL. When this flag is set, sock_poll will call sk_poll_ll if possible. sock_poll sets this flag in its return value to indicate to select/poll when a socket that can busy poll is found. When poll/select have nothing to report, call the low-level sock_poll again until we are out of time or we find something. Once the system call finds something, it stops setting POLL_LL, so it can return the result to the user ASAP. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- net/core/sysctl_net_core.c | 8 ++++++++ net/socket.c | 14 +++++++++++++- 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 1e744b12fda..b6c619f4d47 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2307,7 +2307,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) #ifdef CONFIG_NET_LL_RX_POLL sk->sk_napi_id = 0; - sk->sk_ll_usec = sysctl_net_ll_poll; + sk->sk_ll_usec = sysctl_net_ll_read; #endif /* diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 62702c2053d..afc677eadd9 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -306,6 +306,14 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "low_latency_read", + .data = &sysctl_net_ll_read, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +# #endif #endif /* CONFIG_NET */ { diff --git a/net/socket.c b/net/socket.c index 3eec3f76b49..4da14cbd49b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -107,6 +107,7 @@ #include #ifdef CONFIG_NET_LL_RX_POLL +unsigned int sysctl_net_ll_read __read_mostly; unsigned int sysctl_net_ll_poll __read_mostly; #endif @@ -1147,13 +1148,24 @@ EXPORT_SYMBOL(sock_create_lite); /* No kernel lock held - perfect */ static unsigned int sock_poll(struct file *file, poll_table *wait) { + unsigned int ll_flag = 0; struct socket *sock; /* * We can't return errors to poll, so it's either yes or no. */ sock = file->private_data; - return sock->ops->poll(file, sock, wait); + + if (sk_valid_ll(sock->sk)) { + /* this socket can poll_ll so tell the system call */ + ll_flag = POLL_LL; + + /* once, only if requested by syscall */ + if (wait && (wait->_key & POLL_LL)) + sk_poll_ll(sock->sk, 1); + } + + return ll_flag | sock->ops->poll(file, sock, wait); } static int sock_mmap(struct file *file, struct vm_area_struct *vma) -- cgit v1.2.3 From 537f7f8494be4219eb0ef47121ea16a6f9f0f49e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 25 Jun 2013 09:34:36 -0700 Subject: bridge: check for zero ether address in fdb add The check for all-zero ether address was removed from rtnetlink core, since Vxlan uses all-zero ether address to signify default address. Need to add check back in for bridge. Signed-off-by: Stephen Hemminger --- net/bridge/br_fdb.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ebfa4443c69..60aca9109a5 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -707,6 +707,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } } + if (is_zero_ether_addr(addr)) { + pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n"); + return -EINVAL; + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", -- cgit v1.2.3 From bba54de5bdd107d3841b560f1a9cb0ed06e79533 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 16 Jun 2013 09:09:36 +0300 Subject: ipvs: provide iph to schedulers Before now the schedulers needed access only to IP addresses and it was easy to get them from skb by using ip_vs_fill_iph_addr_only. New changes for the SH scheduler will need the protocol and ports which is difficult to get from skb for the IPv6 case. As we have all the data in the iph structure, to avoid the same slow lookups provide the iph to schedulers. Signed-off-by: Julian Anastasov Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 4 ++-- net/netfilter/ipvs/ip_vs_dh.c | 10 ++++------ net/netfilter/ipvs/ip_vs_lblc.c | 12 +++++------- net/netfilter/ipvs/ip_vs_lblcr.c | 12 +++++------- net/netfilter/ipvs/ip_vs_lc.c | 3 ++- net/netfilter/ipvs/ip_vs_nq.c | 3 ++- net/netfilter/ipvs/ip_vs_rr.c | 3 ++- net/netfilter/ipvs/ip_vs_sed.c | 3 ++- net/netfilter/ipvs/ip_vs_sh.c | 10 ++++------ net/netfilter/ipvs/ip_vs_wlc.c | 3 ++- net/netfilter/ipvs/ip_vs_wrr.c | 3 ++- 11 files changed, 32 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 05565d2b3a6..e9b0330f220 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -305,7 +305,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb); + dest = sched->schedule(svc, skb, iph); if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -452,7 +452,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb); + dest = sched->schedule(svc, skb, iph); if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index ccab120df45..c3b84546ea9 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -214,18 +214,16 @@ static inline int is_overloaded(struct ip_vs_dest *dest) * Destination hashing scheduling */ static struct ip_vs_dest * -ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_dh_state *s; - struct ip_vs_iphdr iph; - - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); s = (struct ip_vs_dh_state *) svc->sched_data; - dest = ip_vs_dh_get(svc->af, s, &iph.daddr); + dest = ip_vs_dh_get(svc->af, s, &iph->daddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 @@ -235,7 +233,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 44595b8ae37..1383b0eadc0 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -487,19 +487,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) * Locality-Based (weighted) Least-Connection scheduling */ static struct ip_vs_dest * -ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_lblc_table *tbl = svc->sched_data; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest = NULL; struct ip_vs_lblc_entry *en; - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); - IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr); + en = ip_vs_lblc_get(svc->af, tbl, &iph->daddr); if (en) { /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -529,12 +527,12 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* If we fail to create a cache entry, we'll just use the valid dest */ spin_lock_bh(&svc->sched_lock); if (!tbl->dead) - ip_vs_lblc_new(tbl, &iph.daddr, dest); + ip_vs_lblc_new(tbl, &iph->daddr, dest); spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 876937db0bf..3cd85b2fc67 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -655,19 +655,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) * Locality-Based (weighted) Least-Connection scheduling */ static struct ip_vs_dest * -ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_lblcr_table *tbl = svc->sched_data; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_lblcr_entry *en; - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); - IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); + en = ip_vs_lblcr_get(svc->af, tbl, &iph->daddr); if (en) { en->lastuse = jiffies; @@ -718,12 +716,12 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* If we fail to create a cache entry, we'll just use the valid dest */ spin_lock_bh(&svc->sched_lock); if (!tbl->dead) - ip_vs_lblcr_new(tbl, &iph.daddr, dest); + ip_vs_lblcr_new(tbl, &iph->daddr, dest); spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 5128e338a74..2bdcb1cf212 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -26,7 +26,8 @@ * Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; unsigned int loh = 0, doh; diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 646cfd4baa7..d8d9860934f 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -55,7 +55,8 @@ ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; unsigned int loh = 0, doh; diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index c35986c793d..176b87c35e3 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -55,7 +55,8 @@ static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) * Round-Robin Scheduling */ static struct ip_vs_dest * -ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct list_head *p; struct ip_vs_dest *dest, *last; diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index f3205925359..a5284cc3d88 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -59,7 +59,8 @@ ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; unsigned int loh, doh; diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index a65edfe4b16..e0d5d165356 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -227,18 +227,16 @@ static inline int is_overloaded(struct ip_vs_dest *dest) * Source Hashing scheduling */ static struct ip_vs_dest * -ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_sh_state *s; - struct ip_vs_iphdr iph; - - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); s = (struct ip_vs_sh_state *) svc->sched_data; - dest = ip_vs_sh_get(svc->af, s, &iph.saddr); + dest = ip_vs_sh_get(svc->af, s, &iph->saddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 @@ -248,7 +246,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.saddr), + IP_VS_DBG_ADDR(svc->af, &iph->saddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index c60a81c4ce9..6dc1fa12884 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -31,7 +31,8 @@ * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; unsigned int loh, doh; diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 0e68555bceb..0546cd572d6 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -162,7 +162,8 @@ static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, * Weighted Round-Robin Scheduling */ static struct ip_vs_dest * -ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *last, *stop = NULL; struct ip_vs_wrr_mark *mark = svc->sched_data; -- cgit v1.2.3 From c6c96c188336b2b95d5f14facd101f1e4165a9d3 Mon Sep 17 00:00:00 2001 From: Alexander Frolkin Date: Thu, 13 Jun 2013 08:56:15 +0100 Subject: ipvs: sloppy TCP and SCTP This adds support for sloppy TCP and SCTP modes to IPVS. When enabled (sysctls net.ipv4.vs.sloppy_tcp and net.ipv4.vs.sloppy_sctp), allows IPVS to create connection state on any packet, not just a TCP SYN (or SCTP INIT). This allows connections to fail over from one IPVS director to another mid-flight. Signed-off-by: Alexander Frolkin Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 14 ++++++++++++++ net/netfilter/ipvs/ip_vs_proto_sctp.c | 18 ++++++++++-------- net/netfilter/ipvs/ip_vs_proto_tcp.c | 14 ++++++++------ 3 files changed, 32 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 47e510819f5..da035fc01eb 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1738,6 +1738,18 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sloppy_tcp", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sloppy_sctp", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "expire_quiescent_template", .maxlen = sizeof(int), @@ -3723,6 +3735,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_sync_sock_size; tbl[idx++].data = &ipvs->sysctl_cache_bypass; tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; + tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; + tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 86464881cd2..df29d641704 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -15,6 +15,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, { struct net *net; struct ip_vs_service *svc; + struct netns_ipvs *ipvs; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; @@ -27,13 +28,14 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (sch == NULL) return 0; net = skb_net(skb); + ipvs = net_ipvs(net); rcu_read_lock(); - if ((sch->type == SCTP_CID_INIT) && + if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, &iph->daddr, sh->dest))) { int ignored; - if (ip_vs_todrop(net_ipvs(net))) { + if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -232,21 +234,21 @@ static struct ipvs_sctp_nextstate * STATE : IP_VS_SCTP_S_NONE */ /*next state *//*event */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }, diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 50a15944c6c..e3a697234a9 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -39,6 +39,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; + struct netns_ipvs *ipvs; th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); if (th == NULL) { @@ -46,14 +47,15 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, return 0; } net = skb_net(skb); + ipvs = net_ipvs(net); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ rcu_read_lock(); - if (th->syn && + if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst && (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, &iph->daddr, th->dest))) { int ignored; - if (ip_vs_todrop(net_ipvs(net))) { + if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -401,7 +403,7 @@ static struct tcp_states_t tcp_states [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }}, /* OUTPUT */ @@ -415,7 +417,7 @@ static struct tcp_states_t tcp_states [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; @@ -424,7 +426,7 @@ static struct tcp_states_t tcp_states_dos [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }}, /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }}, -/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, +/*ack*/ {{sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, /* OUTPUT */ @@ -438,7 +440,7 @@ static struct tcp_states_t tcp_states_dos [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }}, /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; -- cgit v1.2.3 From 61e7c420b4b2a797ac209106ba743ab6ebe984d8 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 18 Jun 2013 10:08:07 +0300 Subject: ipvs: replace the SCTP state machine Convert the SCTP state table, so that it is more readable. Change the states to be according to the diagram in RFC 2960 and add more states suitable for middle box. Still, such change in states adds incompatibility if systems in sync setup include this change and others do not include it. With this change we also have proper transitions in INPUT-ONLY mode (DR/TUN) where we see packets only from client. Now we should not switch to 10-second CLOSED state at a time when we should stay in ESTABLISHED state. The short names for states are because we have 16-char space in ipvsadm and 11-char limit for the connection list format. It is a sequence of the TCP implementation where the longest state name is ESTABLISHED. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 854 ++++++---------------------------- net/netfilter/ipvs/ip_vs_sync.c | 7 +- 2 files changed, 157 insertions(+), 704 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index df29d641704..3c0da872803 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -185,710 +185,159 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) return 1; } -struct ipvs_sctp_nextstate { - int next_state; -}; enum ipvs_sctp_event_t { - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_SER, - IP_VS_SCTP_EVE_INIT_CLI, - IP_VS_SCTP_EVE_INIT_SER, - IP_VS_SCTP_EVE_INIT_ACK_CLI, - IP_VS_SCTP_EVE_INIT_ACK_SER, - IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, - IP_VS_SCTP_EVE_COOKIE_ECHO_SER, - IP_VS_SCTP_EVE_COOKIE_ACK_CLI, - IP_VS_SCTP_EVE_COOKIE_ACK_SER, - IP_VS_SCTP_EVE_ABORT_CLI, - IP_VS_SCTP_EVE__ABORT_SER, - IP_VS_SCTP_EVE_SHUT_CLI, - IP_VS_SCTP_EVE_SHUT_SER, - IP_VS_SCTP_EVE_SHUT_ACK_CLI, - IP_VS_SCTP_EVE_SHUT_ACK_SER, - IP_VS_SCTP_EVE_SHUT_COM_CLI, - IP_VS_SCTP_EVE_SHUT_COM_SER, - IP_VS_SCTP_EVE_LAST + IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */ + IP_VS_SCTP_INIT, + IP_VS_SCTP_INIT_ACK, + IP_VS_SCTP_COOKIE_ECHO, + IP_VS_SCTP_COOKIE_ACK, + IP_VS_SCTP_SHUTDOWN, + IP_VS_SCTP_SHUTDOWN_ACK, + IP_VS_SCTP_SHUTDOWN_COMPLETE, + IP_VS_SCTP_ERROR, + IP_VS_SCTP_ABORT, + IP_VS_SCTP_EVENT_LAST }; -static enum ipvs_sctp_event_t sctp_events[256] = { - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_INIT_CLI, - IP_VS_SCTP_EVE_INIT_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_ABORT_CLI, - IP_VS_SCTP_EVE_SHUT_CLI, - IP_VS_SCTP_EVE_SHUT_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, - IP_VS_SCTP_EVE_COOKIE_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_SHUT_COM_CLI, +/* RFC 2960, 3.2 Chunk Field Descriptions */ +static __u8 sctp_events[] = { + [SCTP_CID_DATA] = IP_VS_SCTP_DATA, + [SCTP_CID_INIT] = IP_VS_SCTP_INIT, + [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK, + [SCTP_CID_SACK] = IP_VS_SCTP_DATA, + [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA, + [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA, + [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT, + [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN, + [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK, + [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR, + [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO, + [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK, + [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA, + [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA, + [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE, }; -static struct ipvs_sctp_nextstate - sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = { - /* - * STATE : IP_VS_SCTP_S_NONE - */ - /*next state *//*event */ - {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }, - }, - /* - * STATE : IP_VS_SCTP_S_INIT_CLI - * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT) - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_SER - * Server sent INIT and waiting for INIT ACK from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_ACK_CLI - * Client sent INIT ACK and waiting for ECHO from the server - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK has been resent by the client, let us stay is in - * the same state - */ - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK sent by the server, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * ECHO by client, it should not happen, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO by server, this is what we are expecting, move to ECHO_SER - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, it should not happen, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * Unexpected COOKIE ACK from server, staty in the same state - */ - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_ACK_SER - * Server sent INIT ACK and waiting for ECHO from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * Unexpected INIT_ACK by the client, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK resent by the server, let us move to same state - */ - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client send the ECHO, this is what we are expecting, - * move to ECHO_CLI - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO received from the server, Not sure what to do, - * let us close it - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, let us stay in the same state - */ - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, hmm... this should not happen, lets close - * the connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ECHO_CLI - * Cient sent ECHO and waiting COOKEI ACK from the Server - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK has been by the client, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client resent the ECHO, let us stay in the same state - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO received from the server, Not sure what to do, - * let us close it - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, this shoud not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, this is what we are awaiting,lets move to - * ESTABLISHED. - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ECHO_SER - * Server sent ECHO and waiting COOKEI ACK from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK has been by the server, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent the ECHO, not sure what to do, let's close the - * connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO resent by the server, stay in the same state - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, this is what we are expecting, let's move - * to ESTABLISHED. - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, this should not happen, lets close the - * connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ESTABLISHED - * Association established - */ - {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_SHUT_CLI - * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN resent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this is what we are expecting, let's move - * to SHUDOWN_ACK_SER - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_SHUT_SER - * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN resent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN resent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this is what we are expecting, let's - * move to SHUT_ACK_CLI - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - - /* - * State : IP_VS_SCTP_S_SHUT_ACK_CLI - * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN sent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN sent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client resent SHUDTDOWN_ACK, let's stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - /* - * SHUTDOWN COMPLETE from server this is what we are expecting. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - - /* - * State : IP_VS_SCTP_S_SHUT_ACK_SER - * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ +/* SCTP States: + * See RFC 2960, 4. SCTP Association State Diagram + * + * New states (not in diagram): + * - INIT1 state: use shorter timeout for dropped INIT packets + * - REJECTED state: use shorter timeout if INIT is rejected with ABORT + * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging + * + * The states are as seen in real server. In the diagram, INIT1, INIT, + * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state. + * + * States as per packets from client (C) and server (S): + * + * Setup of client connection: + * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK + * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK + * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO + * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK + * + * Setup of server connection: + * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK + * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO + * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK + */ - {{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN sent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN sent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen let's close - * the connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server resent SHUTDOWN ACK, stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this what we are expecting, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - /* - * SHUTDOWN COMPLETE from server this should not happen. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_CLOSED - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - } +#define sNO IP_VS_SCTP_S_NONE +#define sI1 IP_VS_SCTP_S_INIT1 +#define sIN IP_VS_SCTP_S_INIT +#define sCS IP_VS_SCTP_S_COOKIE_SENT +#define sCR IP_VS_SCTP_S_COOKIE_REPLIED +#define sCW IP_VS_SCTP_S_COOKIE_WAIT +#define sCO IP_VS_SCTP_S_COOKIE +#define sCE IP_VS_SCTP_S_COOKIE_ECHOED +#define sES IP_VS_SCTP_S_ESTABLISHED +#define sSS IP_VS_SCTP_S_SHUTDOWN_SENT +#define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED +#define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT +#define sRJ IP_VS_SCTP_S_REJECTED +#define sCL IP_VS_SCTP_S_CLOSED + +static const __u8 sctp_states + [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = { + { /* INPUT */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, +/* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL}, +/* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL}, +/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, + { /* OUTPUT */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW}, +/* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL}, +/* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, + { /* INPUT-ONLY */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, +/* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL}, +/* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL}, +/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, }; -/* - * Timeout table[state] - */ +#define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ) + +/* Timeout table[state] */ static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { - [IP_VS_SCTP_S_NONE] = 2 * HZ, - [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_ACK_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_ACK_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ECHO_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ECHO_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_ACK_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_ACK_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_CLOSED] = 10 * HZ, - [IP_VS_SCTP_S_LAST] = 2 * HZ, + [IP_VS_SCTP_S_NONE] = 2 * HZ, + [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ, + [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, + [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ, + [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_LAST] = 2 * HZ, }; static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { - [IP_VS_SCTP_S_NONE] = "NONE", - [IP_VS_SCTP_S_INIT_CLI] = "INIT_CLI", - [IP_VS_SCTP_S_INIT_SER] = "INIT_SER", - [IP_VS_SCTP_S_INIT_ACK_CLI] = "INIT_ACK_CLI", - [IP_VS_SCTP_S_INIT_ACK_SER] = "INIT_ACK_SER", - [IP_VS_SCTP_S_ECHO_CLI] = "COOKIE_ECHO_CLI", - [IP_VS_SCTP_S_ECHO_SER] = "COOKIE_ECHO_SER", - [IP_VS_SCTP_S_ESTABLISHED] = "ESTABISHED", - [IP_VS_SCTP_S_SHUT_CLI] = "SHUTDOWN_CLI", - [IP_VS_SCTP_S_SHUT_SER] = "SHUTDOWN_SER", - [IP_VS_SCTP_S_SHUT_ACK_CLI] = "SHUTDOWN_ACK_CLI", - [IP_VS_SCTP_S_SHUT_ACK_SER] = "SHUTDOWN_ACK_SER", - [IP_VS_SCTP_S_CLOSED] = "CLOSED", - [IP_VS_SCTP_S_LAST] = "BUG!" + [IP_VS_SCTP_S_NONE] = "NONE", + [IP_VS_SCTP_S_INIT1] = "INIT1", + [IP_VS_SCTP_S_INIT] = "INIT", + [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT", + [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED", + [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT", + [IP_VS_SCTP_S_COOKIE] = "COOKIE", + [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED", + [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED", + [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT", + [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED", + [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT", + [IP_VS_SCTP_S_REJECTED] = "REJECTED", + [IP_VS_SCTP_S_CLOSED] = "CLOSED", + [IP_VS_SCTP_S_LAST] = "BUG!", }; @@ -945,17 +394,20 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, } } - event = sctp_events[chunk_type]; + event = (chunk_type < sizeof(sctp_events)) ? + sctp_events[chunk_type] : IP_VS_SCTP_DATA; - /* - * If the direction is IP_VS_DIR_OUTPUT, this event is from server - */ - if (direction == IP_VS_DIR_OUTPUT) - event++; - /* - * get next state + /* Update direction to INPUT_ONLY if necessary + * or delete NO_OUTPUT flag if output packet detected */ - next_state = sctp_states_table[cp->state][event].next_state; + if (cp->flags & IP_VS_CONN_F_NOOUTPUT) { + if (direction == IP_VS_DIR_OUTPUT) + cp->flags &= ~IP_VS_CONN_F_NOOUTPUT; + else + direction = IP_VS_DIR_INPUT_ONLY; + } + + next_state = sctp_states[direction][event][cp->state]; if (next_state != cp->state) { struct ip_vs_dest *dest = cp->dest; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f6046d9af8d..2fc66394d86 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -461,9 +461,10 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { if (!((1 << cp->state) & ((1 << IP_VS_SCTP_S_ESTABLISHED) | - (1 << IP_VS_SCTP_S_CLOSED) | - (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) | - (1 << IP_VS_SCTP_S_SHUT_ACK_SER)))) + (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) | + (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) | + (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) | + (1 << IP_VS_SCTP_S_CLOSED)))) return 0; force = cp->state != cp->old_state; if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) -- cgit v1.2.3 From acaac5d8bbedf6bd96f53960780942e1ad90d70e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 18 Jun 2013 10:08:08 +0300 Subject: ipvs: drop SCTP connections depending on state Drop SCTP connections under load (dropentry context) depending on the protocol state, just like for TCP: INIT conns are dropped immediately, established are dropped randomly while connections in progress or shutdown are skipped. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index c8c52a98590..4c8e5c0aa1a 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1231,6 +1231,18 @@ void ip_vs_random_dropentry(struct net *net) default: continue; } + } else if (cp->protocol == IPPROTO_SCTP) { + switch (cp->state) { + case IP_VS_SCTP_S_INIT1: + case IP_VS_SCTP_S_INIT: + break; + case IP_VS_SCTP_S_ESTABLISHED: + if (todrop_entry(cp)) + break; + continue; + default: + continue; + } } else { if (!todrop_entry(cp)) continue; -- cgit v1.2.3 From eba3b5a78799d21dea05118b294524958f0ab592 Mon Sep 17 00:00:00 2001 From: Alexander Frolkin Date: Wed, 19 Jun 2013 10:54:25 +0100 Subject: ipvs: SH fallback and L4 hashing By default the SH scheduler rejects connections that are hashed onto a realserver of weight 0. This patch adds a flag to make SH choose a different realserver in this case, instead of rejecting the connection. The patch also adds a flag to make SH include the source port (TCP, UDP, SCTP) in the hash as well as the source address. This basically allows for deterministic round-robin load balancing (i.e., where any director in a cluster of directors with identical config will send the same packet the same way). The flags are service flags (IP_VS_SVC_F_SCHED*) so that these options can be set per service. They are set using a new option to ipvsadm. Signed-off-by: Alexander Frolkin Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sh.c | 100 +++++++++++++++++++++++++++++++++++------- 1 file changed, 85 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index e0d5d165356..f16c027df15 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -48,6 +48,10 @@ #include +#include +#include +#include + /* * IPVS SH bucket @@ -71,10 +75,19 @@ struct ip_vs_sh_state { struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; }; +/* Helper function to determine if server is unavailable */ +static inline bool is_unavailable(struct ip_vs_dest *dest) +{ + return atomic_read(&dest->weight) <= 0 || + dest->flags & IP_VS_DEST_F_OVERLOAD; +} + /* * Returns hash value for IPVS SH entry */ -static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr) +static inline unsigned int +ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr, + __be16 port, unsigned int offset) { __be32 addr_fold = addr->ip; @@ -83,7 +96,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif - return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK; + return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) & + IP_VS_SH_TAB_MASK; } @@ -91,12 +105,42 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr) +ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, + const union nf_inet_addr *addr, __be16 port) { - return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest); + unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0); + struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest); + + return (!dest || is_unavailable(dest)) ? NULL : dest; } +/* As ip_vs_sh_get, but with fallback if selected server is unavailable */ +static inline struct ip_vs_dest * +ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, + const union nf_inet_addr *addr, __be16 port) +{ + unsigned int offset; + unsigned int hash; + struct ip_vs_dest *dest; + + for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { + hash = ip_vs_sh_hashkey(svc->af, addr, port, offset); + dest = rcu_dereference(s->buckets[hash].dest); + if (!dest) + break; + if (is_unavailable(dest)) + IP_VS_DBG_BUF(6, "SH: selected unavailable server " + "%s:%d (offset %d)", + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), offset); + else + return dest; + } + + return NULL; +} + /* * Assign all the hash buckets of the specified table with the service. */ @@ -213,13 +257,33 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, } -/* - * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, - * consider that the server is overloaded here. - */ -static inline int is_overloaded(struct ip_vs_dest *dest) +/* Helper function to get port number */ +static inline __be16 +ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { - return dest->flags & IP_VS_DEST_F_OVERLOAD; + __be16 port; + struct tcphdr _tcph, *th; + struct udphdr _udph, *uh; + sctp_sctphdr_t _sctph, *sh; + + switch (iph->protocol) { + case IPPROTO_TCP: + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); + port = th->source; + break; + case IPPROTO_UDP: + uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); + port = uh->source; + break; + case IPPROTO_SCTP: + sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); + port = sh->source; + break; + default: + port = 0; + } + + return port; } @@ -232,15 +296,21 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, { struct ip_vs_dest *dest; struct ip_vs_sh_state *s; + __be16 port = 0; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); + if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT) + port = ip_vs_sh_get_port(skb, iph); + s = (struct ip_vs_sh_state *) svc->sched_data; - dest = ip_vs_sh_get(svc->af, s, &iph->saddr); - if (!dest - || !(dest->flags & IP_VS_DEST_F_AVAILABLE) - || atomic_read(&dest->weight) <= 0 - || is_overloaded(dest)) { + + if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) + dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port); + else + dest = ip_vs_sh_get(svc, s, &iph->saddr, port); + + if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); return NULL; } -- cgit v1.2.3 From 4d0c875dcc4923476f364e83912d134da2df224c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 24 Jun 2013 22:44:41 +0300 Subject: ipvs: add sync_persist_mode flag Add sync_persist_mode flag to reduce sync traffic by syncing only persistent templates. Signed-off-by: Julian Anastasov Tested-by: Aleksey Chudov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 7 +++++++ net/netfilter/ipvs/ip_vs_sync.c | 12 ++++++++++++ 2 files changed, 19 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index da035fc01eb..c8148e48738 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1714,6 +1714,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = &proc_do_sync_ports, }, + { + .procname = "sync_persist_mode", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "sync_qlen_max", .maxlen = sizeof(unsigned long), @@ -3729,6 +3735,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_sync_ver; ipvs->sysctl_sync_ports = 1; tbl[idx++].data = &ipvs->sysctl_sync_ports; + tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; ipvs->sysctl_sync_sock_size = 0; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 2fc66394d86..f4484719f3e 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -425,6 +425,16 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) return sb; } +/* Check if connection is controlled by persistence */ +static inline bool in_persistence(struct ip_vs_conn *cp) +{ + for (cp = cp->control; cp; cp = cp->control) { + if (cp->flags & IP_VS_CONN_F_TEMPLATE) + return true; + } + return false; +} + /* Check if conn should be synced. * pkts: conn packets, use sysctl_sync_threshold to avoid packet check * - (1) sync_refresh_period: reduce sync rate. Additionally, retry @@ -447,6 +457,8 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, /* Check if we sync in current state */ if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) force = 0; + else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp))) + return 0; else if (likely(cp->protocol == IPPROTO_TCP)) { if (!((1 << cp->state) & ((1 << IP_VS_TCP_S_ESTABLISHED) | -- cgit v1.2.3 From 963b89e80d9fb7f22fc2688428e121b410b76504 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 26 Jun 2013 17:40:33 +0200 Subject: sit: fix 4in4 + IPsec scenario Since commit 32b8a8e59c9c "sit: add IPv4 over IPv4 support", tunnel->parms.iph.protocol is 0 when both 4in4 and 6in4 are setup, but xfrm_lookup() is called only when proto is != 0, thus we need to pass the real value. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index bd227e5ea9d..3b00d81c8f1 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -542,7 +542,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } rt = ip_route_output_tunnel(dev_net(dev), &fl4, - tunnel->parms.iph.protocol, + protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), -- cgit v1.2.3 From 77ecaace6c5487eae8ede633ad51478511a8e125 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 26 Jun 2013 03:41:49 +0200 Subject: ipv6: rearm router solicitaion timer when setting new tokenized address When a new tokenized address gets installed we send out just one router solicition. We should send out `rtr_solicits' in case one router advertisment got lost. So, rearm the timer as we do in addrconf_dad_complete. Cc: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index afaf3cdadf5..4e4cc1fc26d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4361,8 +4361,11 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) write_lock_bh(&idev->lock); - if (update_rs) + if (update_rs) { idev->if_flags |= IF_RS_SENT; + idev->rs_probes = 1; + addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval); + } /* Well, that's kinda nasty ... */ list_for_each_entry(ifp, &idev->addr_list, if_list) { -- cgit v1.2.3 From 621e84d6f373dcb273ebfd772638b8e7dc3c2c48 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 26 Jun 2013 16:11:27 +0200 Subject: dev: introduce skb_scrub_packet() The goal of this new function is to perform all needed cleanup before sending an skb into another netns. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/dev.c | 11 +---------- net/core/skbuff.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 722f633926e..370354a9c5f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1652,22 +1652,13 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } } - skb_orphan(skb); - if (unlikely(!is_skb_forwardable(dev, skb))) { atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); return NET_RX_DROP; } - skb->skb_iif = 0; - skb_dst_drop(skb); - skb->tstamp.tv64 = 0; - skb->pkt_type = PACKET_HOST; + skb_scrub_packet(skb); skb->protocol = eth_type_trans(skb, dev); - skb->mark = 0; - secpath_reset(skb); - nf_reset(skb); - nf_reset_trace(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9f73eca29fb..b1fcb8727e5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3492,3 +3492,26 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, return true; } EXPORT_SYMBOL(skb_try_coalesce); + +/** + * skb_scrub_packet - scrub an skb before sending it to another netns + * + * @skb: buffer to clean + * + * skb_scrub_packet can be used to clean an skb before injecting it in + * another namespace. We have to clear all information in the skb that + * could impact namespace isolation. + */ +void skb_scrub_packet(struct sk_buff *skb) +{ + skb_orphan(skb); + skb->tstamp.tv64 = 0; + skb->pkt_type = PACKET_HOST; + skb->skb_iif = 0; + skb_dst_drop(skb); + skb->mark = 0; + secpath_reset(skb); + nf_reset(skb); + nf_reset_trace(skb); +} +EXPORT_SYMBOL_GPL(skb_scrub_packet); -- cgit v1.2.3 From 5e6700b3bf98fe98d630bf9c939ad4c85ce95592 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 26 Jun 2013 16:11:28 +0200 Subject: sit: add support of x-netns This patch allows to switch the netns when packet is encapsulated or decapsulated. In other word, the encapsulated packet is received in a netns, where the lookup is done to find the tunnel. Once the tunnel is found, the packet is decapsulated and injecting into the corresponding interface which stands to another netns. When one of the two netns is removed, the tunnel is destroyed. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 10 +++++++++- net/ipv6/sit.c | 42 ++++++++++++++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 3b00d81c8f1..394cebc96d2 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -304,6 +304,7 @@ static struct net_device *__ip_tunnel_create(struct net *net, tunnel = netdev_priv(dev); tunnel->parms = *parms; + tunnel->net = net; err = register_netdevice(dev); if (err) @@ -453,6 +454,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); + if (tunnel->net != dev_net(tunnel->dev)) + skb_scrub_packet(skb); + if (tunnel->dev->type == ARPHRD_ETHER) { skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -541,7 +545,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); } - rt = ip_route_output_tunnel(dev_net(dev), &fl4, + rt = ip_route_output_tunnel(tunnel->net, &fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, @@ -602,6 +606,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } #endif + if (tunnel->net != dev_net(dev)) + skb_scrub_packet(skb); + if (tunnel->err_count > 0) { if (time_before(jiffies, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { @@ -888,6 +895,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], if (ip_tunnel_find(itn, p, dev->type)) return -EEXIST; + nt->net = net; nt->parms = *p; err = register_netdevice(dev); if (err) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index f639866b3dc..97a0bfe2c29 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -466,14 +466,14 @@ isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t) static void ipip6_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); - struct sit_net *sitn = net_generic(net, sit_net_id); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct sit_net *sitn = net_generic(tunnel->net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); } else { - ipip6_tunnel_unlink(sitn, netdev_priv(dev)); - ipip6_tunnel_del_prl(netdev_priv(dev), NULL); + ipip6_tunnel_unlink(sitn, tunnel); + ipip6_tunnel_del_prl(tunnel, NULL); } dev_put(dev); } @@ -621,6 +621,8 @@ static int ipip6_rcv(struct sk_buff *skb) tstats->rx_packets++; tstats->rx_bytes += skb->len; + if (tunnel->net != dev_net(tunnel->dev)) + skb_scrub_packet(skb); netif_rx(skb); return 0; @@ -803,7 +805,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, + rt = ip_route_output_ports(tunnel->net, &fl4, NULL, dst, tiph->saddr, 0, 0, IPPROTO_IPV6, RT_TOS(tos), @@ -858,6 +860,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tunnel->err_count = 0; } + if (tunnel->net != dev_net(dev)) + skb_scrub_packet(skb); + /* * Okay, now see if we can stuff it in the buffer as-is. */ @@ -944,7 +949,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct rtable *rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, + struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4, + NULL, iph->daddr, iph->saddr, 0, 0, IPPROTO_IPV6, @@ -959,7 +965,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); @@ -972,7 +978,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) { - struct net *net = dev_net(t->dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); ipip6_tunnel_unlink(sitn, t); @@ -1248,7 +1254,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->iflink = 0; dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_LLTX; } @@ -1257,6 +1262,7 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; + tunnel->net = dev_net(dev); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -1277,6 +1283,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); tunnel->dev = dev; + tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); iph->version = 4; @@ -1564,8 +1571,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = { static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) { + struct net *net = dev_net(sitn->fb_tunnel_dev); + struct net_device *dev, *aux; int prio; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &sit_link_ops) + unregister_netdevice_queue(dev, head); + for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { @@ -1573,7 +1586,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea t = rtnl_dereference(sitn->tunnels[prio][h]); while (t != NULL) { - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (dev_net(t->dev) != net) + unregister_netdevice_queue(t->dev, + head); t = rtnl_dereference(t->next); } } @@ -1598,6 +1616,10 @@ static int __net_init sit_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(sitn->fb_tunnel_dev, net); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); if (err) -- cgit v1.2.3 From 3a36515f729458c8efa0c124c7262d5843ad5c37 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Fri, 28 Jun 2013 03:04:23 +0200 Subject: netlink: fix splat in skb_clone with large messages Since (c05cdb1 netlink: allow large data transfers from user-space), netlink splats if it invokes skb_clone on large netlink skbs since: * skb_shared_info was not correctly initialized. * skb->destructor is not set in the cloned skb. This was spotted by trinity: [ 894.990671] BUG: unable to handle kernel paging request at ffffc9000047b001 [ 894.991034] IP: [] skb_clone+0x24/0xc0 [...] [ 894.991034] Call Trace: [ 894.991034] [] nl_fib_input+0x6a/0x240 [ 894.991034] [] ? _raw_read_unlock+0x26/0x40 [ 894.991034] [] netlink_unicast+0x169/0x1e0 [ 894.991034] [] netlink_sendmsg+0x251/0x3d0 Fix it by: 1) introducing a new netlink_skb_clone function that is used in nl_fib_input, that sets our special skb->destructor in the cloned skb. Moreover, handle the release of the large cloned skb head area in the destructor path. 2) not allowing large skbuffs in the netlink broadcast path. I cannot find any reasonable use of the large data transfer using netlink in that path, moreover this helps to skip extra skb_clone handling. I found two more netlink clients that are cloning the skbs, but they are not in the sendmsg path. Therefore, the sole client cloning that I found seems to be the fib frontend. Thanks to Eric Dumazet for helping to address this issue. Reported-by: Fengguang Wu Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 +- net/netlink/af_netlink.c | 35 ++++++++++++++++++----------------- 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 05a4888dede..b3f627ac4ed 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -961,7 +961,7 @@ static void nl_fib_input(struct sk_buff *skb) nlmsg_len(nlh) < sizeof(*frn)) return; - skb = skb_clone(skb, GFP_KERNEL); + skb = netlink_skb_clone(skb, GFP_KERNEL); if (skb == NULL) return; nlh = nlmsg_hdr(skb); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6967fbcca6c..0c61b59175d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -849,7 +849,10 @@ static void netlink_skb_destructor(struct sk_buff *skb) } #endif if (is_vmalloc_addr(skb->head)) { - vfree(skb->head); + if (!skb->cloned || + !atomic_dec_return(&(skb_shinfo(skb)->dataref))) + vfree(skb->head); + skb->head = NULL; } if (skb->sk != NULL) @@ -1532,33 +1535,31 @@ struct sock *netlink_getsockbyfilp(struct file *filp) return sock; } -static struct sk_buff *netlink_alloc_large_skb(unsigned int size) +static struct sk_buff *netlink_alloc_large_skb(unsigned int size, + int broadcast) { struct sk_buff *skb; void *data; - if (size <= NLMSG_GOODSIZE) + if (size <= NLMSG_GOODSIZE || broadcast) return alloc_skb(size, GFP_KERNEL); - skb = alloc_skb_head(GFP_KERNEL); - if (skb == NULL) - return NULL; + size = SKB_DATA_ALIGN(size) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); data = vmalloc(size); if (data == NULL) - goto err; + return NULL; - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->len = 0; - skb->destructor = netlink_skb_destructor; + skb = build_skb(data, size); + if (skb == NULL) + vfree(data); + else { + skb->head_frag = 0; + skb->destructor = netlink_skb_destructor; + } return skb; -err: - kfree_skb(skb); - return NULL; } /* @@ -2244,7 +2245,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; - skb = netlink_alloc_large_skb(len); + skb = netlink_alloc_large_skb(len, dst_group); if (skb == NULL) goto out; -- cgit v1.2.3 From 384816051ca9125cd54750e59c780c2a2655fa4f Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 24 Jun 2013 11:52:38 +0400 Subject: SUNRPC: fix races on PipeFS MOUNT notifications Below are races, when RPC client can be created without PiepFS dentries CPU#0 CPU#1 ----------------------------- ----------------------------- rpc_new_client rpc_fill_super rpc_setup_pipedir mutex_lock(&sn->pipefs_sb_lock) rpc_get_sb_net == NULL (no per-net PipeFS superblock) sn->pipefs_sb = sb; notifier_call_chain(MOUNT) (client is not in the list) rpc_register_client (client without pipes dentries) To fix this patch: 1) makes PipeFS mount notification call with pipefs_sb_lock being held. 2) releases pipefs_sb_lock on new SUNRPC client creation only after registration. Signed-off-by: Stanislav Kinsbursky Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 26 +++++++++++++++----------- net/sunrpc/rpc_pipe.c | 3 +++ 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5a750b9c364..b827a4b9197 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -157,20 +157,15 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, } static int -rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name) +rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name, + struct super_block *pipefs_sb) { - struct net *net = rpc_net_ns(clnt); - struct super_block *pipefs_sb; struct dentry *dentry; clnt->cl_dentry = NULL; if (dir_name == NULL) return 0; - pipefs_sb = rpc_get_sb_net(net); - if (!pipefs_sb) - return 0; dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); - rpc_put_sb_net(net); if (IS_ERR(dentry)) return PTR_ERR(dentry); clnt->cl_dentry = dentry; @@ -296,6 +291,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; int err; + struct super_block *pipefs_sb; /* sanity check the name before trying to print it */ dprintk("RPC: creating %s client for %s (xprt %p)\n", @@ -354,9 +350,12 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru atomic_set(&clnt->cl_count, 1); - err = rpc_setup_pipedir(clnt, program->pipe_dir_name); - if (err < 0) - goto out_no_path; + pipefs_sb = rpc_get_sb_net(rpc_net_ns(clnt)); + if (pipefs_sb) { + err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb); + if (err) + goto out_no_path; + } auth = rpcauth_create(args->authflavor, clnt); if (IS_ERR(auth)) { @@ -369,11 +368,16 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru /* save the nodename */ rpc_clnt_set_nodename(clnt, utsname()->nodename); rpc_register_client(clnt); + if (pipefs_sb) + rpc_put_sb_net(rpc_net_ns(clnt)); return clnt; out_no_auth: - rpc_clnt_remove_pipedir(clnt); + if (pipefs_sb) + __rpc_clnt_remove_pipedir(clnt); out_no_path: + if (pipefs_sb) + rpc_put_sb_net(rpc_net_ns(clnt)); kfree(clnt->cl_principal); out_no_principal: rpc_free_iostats(clnt->cl_metrics); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index a816b3a6905..e02823bdfe9 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1127,6 +1127,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", net, NET_NAME(net)); + mutex_lock(&sn->pipefs_sb_lock); sn->pipefs_sb = sb; err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_MOUNT, @@ -1134,6 +1135,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_depopulate; sb->s_fs_info = get_net(net); + mutex_unlock(&sn->pipefs_sb_lock); return 0; err_depopulate: @@ -1142,6 +1144,7 @@ err_depopulate: sb); sn->pipefs_sb = NULL; __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); + mutex_unlock(&sn->pipefs_sb_lock); return err; } -- cgit v1.2.3 From adb6fa7ffe9031857ec14b8aab75c9ab65556cbc Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 26 Jun 2013 10:15:14 +0400 Subject: SUNRPC: fix races on PipeFS UMOUNT notifications CPU#0 CPU#1 ----------------------------- ----------------------------- rpc_kill_sb sn->pipefs_sb = NULL rpc_release_client (UMOUNT_EVENT) rpc_free_auth rpc_pipefs_event rpc_get_client_for_event !atomic_inc_not_zero(cl_count) atomic_inc(cl_count) rpc_free_client rpc_clnt_remove_pipedir To fix this, this patch does the following: 1) Calls RPC_PIPEFS_UMOUNT notification with sn->pipefs_sb_lock being held. 2) Removes SUNRPC client from the list AFTER pipes destroying. 3) Doesn't hold RPC client on notification: if client in the list, then it can't be destroyed while sn->pipefs_sb_lock in hold by notification caller. Signed-off-by: Stanislav Kinsbursky Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 5 +---- net/sunrpc/rpc_pipe.c | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b827a4b9197..41f180c5a49 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -236,8 +236,6 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event) continue; if (rpc_clnt_skip_event(clnt, event)) continue; - if (atomic_inc_not_zero(&clnt->cl_count) == 0) - continue; spin_unlock(&sn->rpc_client_lock); return clnt; } @@ -254,7 +252,6 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) { error = __rpc_pipefs_event(clnt, event, sb); - rpc_release_client(clnt); if (error) break; } @@ -641,8 +638,8 @@ rpc_free_client(struct rpc_clnt *clnt) rcu_dereference(clnt->cl_xprt)->servername); if (clnt->cl_parent != clnt) rpc_release_client(clnt->cl_parent); - rpc_unregister_client(clnt); rpc_clnt_remove_pipedir(clnt); + rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); kfree(clnt->cl_principal); clnt->cl_metrics = NULL; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e02823bdfe9..4679df5a6d5 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1166,12 +1166,12 @@ static void rpc_kill_sb(struct super_block *sb) goto out; } sn->pipefs_sb = NULL; - mutex_unlock(&sn->pipefs_sb_lock); dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", net, NET_NAME(net)); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, sb); + mutex_unlock(&sn->pipefs_sb_lock); put_net(net); out: kill_litter_super(sb); -- cgit v1.2.3 From e73f4cc051199799aee4320f300f28ffb82f3eb1 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 24 Jun 2013 11:52:52 +0400 Subject: SUNRPC: split client creation routine into setup and registration This helper moves all "registration" code to the new rpc_client_register() helper. This helper will be used later in the series to synchronize against PipeFS MOUNT/UMOUNT events. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 64 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 41f180c5a49..b4f17117b77 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -281,14 +281,47 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen); } +static int rpc_client_register(const struct rpc_create_args *args, + struct rpc_clnt *clnt) +{ + const struct rpc_program *program = args->program; + struct rpc_auth *auth; + struct net *net = rpc_net_ns(clnt); + struct super_block *pipefs_sb; + int err = 0; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb); + if (err) + goto out; + } + + auth = rpcauth_create(args->authflavor, clnt); + if (IS_ERR(auth)) { + dprintk("RPC: Couldn't create auth handle (flavor %u)\n", + args->authflavor); + err = PTR_ERR(auth); + goto err_auth; + } + + rpc_register_client(clnt); +out: + if (pipefs_sb) + rpc_put_sb_net(net); + return err; + +err_auth: + __rpc_clnt_remove_pipedir(clnt); + goto out; +} + static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) { const struct rpc_program *program = args->program; const struct rpc_version *version; struct rpc_clnt *clnt = NULL; - struct rpc_auth *auth; int err; - struct super_block *pipefs_sb; /* sanity check the name before trying to print it */ dprintk("RPC: creating %s client for %s (xprt %p)\n", @@ -347,34 +380,15 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru atomic_set(&clnt->cl_count, 1); - pipefs_sb = rpc_get_sb_net(rpc_net_ns(clnt)); - if (pipefs_sb) { - err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb); - if (err) - goto out_no_path; - } - - auth = rpcauth_create(args->authflavor, clnt); - if (IS_ERR(auth)) { - dprintk("RPC: Couldn't create auth handle (flavor %u)\n", - args->authflavor); - err = PTR_ERR(auth); - goto out_no_auth; - } - /* save the nodename */ rpc_clnt_set_nodename(clnt, utsname()->nodename); - rpc_register_client(clnt); - if (pipefs_sb) - rpc_put_sb_net(rpc_net_ns(clnt)); + + err = rpc_client_register(args, clnt); + if (err) + goto out_no_path; return clnt; -out_no_auth: - if (pipefs_sb) - __rpc_clnt_remove_pipedir(clnt); out_no_path: - if (pipefs_sb) - rpc_put_sb_net(rpc_net_ns(clnt)); kfree(clnt->cl_principal); out_no_principal: rpc_free_iostats(clnt->cl_metrics); -- cgit v1.2.3 From 4f6bb246f69443549fbbd0f2abaf863243cb35e9 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 24 Jun 2013 11:52:59 +0400 Subject: SUNRPC: PipeFS MOUNT notification optimization for dying clients Not need to create pipes for dying client. So just skip them. Note: we can safely dereference the client structure, because notification caller is holding sn->pipefs_sb_lock. Signed-off-by: Stanislav Kinsbursky Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b4f17117b77..f0339ae9bf3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -177,6 +177,8 @@ static inline int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) || ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry)) return 1; + if ((event == RPC_PIPEFS_MOUNT) && atomic_read(&clnt->cl_count) == 0) + return 1; return 0; } -- cgit v1.2.3 From 1ec047eb4751e331bc61cff0e98f0db67db8b8dc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 27 Jun 2013 00:06:56 +0200 Subject: ipv6: introduce per-interface counter for dad-completed ipv6 addresses To reduce the number of unnecessary router solicitations, MLDv2 and IGMPv3 messages we need to track the number of valid (as in non-optimistic, no-dad-failed and non-tentative) link-local addresses. Therefore, this patch implements a valid_ll_addr_cnt in struct inet6_dev. We now only emit router solicitations if the first link-local address finishes duplicate address detection. The changes for MLDv2 and IGMPv3 are in a follow-up patch. While there, also simplify one if statement(one minor nit I made in one of my previous patches): if (!...) do(); else return; <> if (...) return; do(); Cc: Flavio Leitner Cc: YOSHIFUJI Hideaki Cc: David Stevens Suggested-by: David Stevens Signed-off-by: Hannes Frederic Sowa Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4e4cc1fc26d..20d92ff2d69 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3277,6 +3277,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { struct net_device *dev = ifp->idev->dev; struct in6_addr lladdr; + bool send_rs; addrconf_del_dad_timer(ifp); @@ -3290,20 +3291,25 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) router advertisements, start sending router solicitations. */ - if (ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits > 0 && - (dev->flags&IFF_LOOPBACK) == 0 && - (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { + read_lock_bh(&ifp->idev->lock); + spin_lock(&ifp->lock); + send_rs = ipv6_accept_ra(ifp->idev) && + ifp->idev->cnf.rtr_solicits > 0 && + (dev->flags&IFF_LOOPBACK) == 0 && + ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL && + ifp->idev->valid_ll_addr_cnt == 1; + spin_unlock(&ifp->lock); + read_unlock_bh(&ifp->idev->lock); + + if (send_rs) { /* * If a host as already performed a random delay * [...] as part of DAD [...] there is no need * to delay again before sending the first RS */ - if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) - ndisc_send_rs(dev, &lladdr, - &in6addr_linklocal_allrouters); - else + if (ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) return; + ndisc_send_rs(dev, &lladdr, &in6addr_linklocal_allrouters); write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); @@ -4576,6 +4582,19 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); } +static void update_valid_ll_addr_cnt(struct inet6_ifaddr *ifp, int count) +{ + write_lock_bh(&ifp->idev->lock); + spin_lock(&ifp->lock); + if (((ifp->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|IFA_F_OPTIMISTIC| + IFA_F_DADFAILED)) == IFA_F_PERMANENT) && + (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) + ifp->idev->valid_ll_addr_cnt += count; + WARN_ON(ifp->idev->valid_ll_addr_cnt < 0); + spin_unlock(&ifp->lock); + write_unlock_bh(&ifp->idev->lock); +} + static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { struct net *net = dev_net(ifp->idev->dev); @@ -4584,6 +4603,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: + update_valid_ll_addr_cnt(ifp, 1); + /* * If the address was optimistic * we inserted the route at the start of @@ -4599,6 +4620,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ifp->idev->dev, 0, 0); break; case RTM_DELADDR: + update_valid_ll_addr_cnt(ifp, -1); + if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); -- cgit v1.2.3 From b173ee488dcc545e77ed482158a2f0d06d7a5860 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 27 Jun 2013 00:07:01 +0200 Subject: ipv6: resend MLD report if a link-local address completes DAD RFC3590/RFC3810 specifies we should resend MLD reports as soon as a valid link-local address is available. We now use the valid_ll_addr_cnt to check if it is necessary to resend a new report. Changes since Flavio Leitner's version: a) adapt for valid_ll_addr_cnt b) resend first reports directly in the path and just arm the timer for mc_qrv-1 resends. Reported-by: Flavio Leitner Cc: Hideaki YOSHIFUJI Cc: David Stevens Signed-off-by: Hannes Frederic Sowa Signed-off-by: Flavio Leitner Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 17 ++++++++++++----- net/ipv6/mcast.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 20d92ff2d69..12dd2fec045 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3277,7 +3277,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { struct net_device *dev = ifp->idev->dev; struct in6_addr lladdr; - bool send_rs; + bool send_rs, send_mld; addrconf_del_dad_timer(ifp); @@ -3293,14 +3293,21 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) read_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); - send_rs = ipv6_accept_ra(ifp->idev) && + send_mld = ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL && + ifp->idev->valid_ll_addr_cnt == 1; + send_rs = send_mld && + ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits > 0 && - (dev->flags&IFF_LOOPBACK) == 0 && - ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL && - ifp->idev->valid_ll_addr_cnt == 1; + (dev->flags&IFF_LOOPBACK) == 0; spin_unlock(&ifp->lock); read_unlock_bh(&ifp->idev->lock); + /* While dad is in progress mld report's source address is in6_addrany. + * Resend with proper ll now. + */ + if (send_mld) + ipv6_mc_dad_complete(ifp->idev); + if (send_rs) { /* * If a host as already performed a random delay diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 72c8bfe06bb..502c877cbf1 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -999,6 +999,14 @@ static void mld_ifc_start_timer(struct inet6_dev *idev, int delay) in6_dev_hold(idev); } +static void mld_dad_start_timer(struct inet6_dev *idev, int delay) +{ + int tv = net_random() % delay; + + if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2)) + in6_dev_hold(idev); +} + /* * IGMP handling (alias multicast ICMPv6 messages) */ @@ -1815,6 +1823,46 @@ err_out: goto out; } +static void mld_resend_report(struct inet6_dev *idev) +{ + if (MLD_V1_SEEN(idev)) { + struct ifmcaddr6 *mcaddr; + read_lock_bh(&idev->lock); + for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) { + if (!(mcaddr->mca_flags & MAF_NOREPORT)) + igmp6_send(&mcaddr->mca_addr, idev->dev, + ICMPV6_MGM_REPORT); + } + read_unlock_bh(&idev->lock); + } else { + mld_send_report(idev, NULL); + } +} + +void ipv6_mc_dad_complete(struct inet6_dev *idev) +{ + idev->mc_dad_count = idev->mc_qrv; + if (idev->mc_dad_count) { + mld_resend_report(idev); + idev->mc_dad_count--; + if (idev->mc_dad_count) + mld_dad_start_timer(idev, idev->mc_maxdelay); + } +} + +static void mld_dad_timer_expire(unsigned long data) +{ + struct inet6_dev *idev = (struct inet6_dev *)data; + + mld_resend_report(idev); + if (idev->mc_dad_count) { + idev->mc_dad_count--; + if (idev->mc_dad_count) + mld_dad_start_timer(idev, idev->mc_maxdelay); + } + __in6_dev_put(idev); +} + static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, const struct in6_addr *psfsrc) { @@ -2232,6 +2280,8 @@ void ipv6_mc_down(struct inet6_dev *idev) idev->mc_gq_running = 0; if (del_timer(&idev->mc_gq_timer)) __in6_dev_put(idev); + if (del_timer(&idev->mc_dad_timer)) + __in6_dev_put(idev); for (i = idev->mc_list; i; i=i->next) igmp6_group_dropped(i); @@ -2268,6 +2318,8 @@ void ipv6_mc_init_dev(struct inet6_dev *idev) idev->mc_ifc_count = 0; setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire, (unsigned long)idev); + setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire, + (unsigned long)idev); idev->mc_qrv = MLD_QRV_DEFAULT; idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL; idev->mc_v1_seen = 0; -- cgit v1.2.3 From 2ffae99d1fac272952b5a395759823717760ce37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Thu, 27 Jun 2013 10:27:05 +0300 Subject: ipv4: use next hop exceptions also for input routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit d2d68ba9 (ipv4: Cache input routes in fib_info nexthops) assmued that "locally destined, and routed packets, never trigger PMTU events or redirects that will be processed by us". However, it seems that tunnel devices do trigger PMTU events in certain cases. At least ip_gre, ip6_gre, sit, and ipip do use the inner flow's skb_dst(skb)->ops->update_pmtu to propage mtu information from the outer flows. These can cause the inner flow mtu to be decreased. If next hop exceptions are not consulted for pmtu, IP fragmentation will not be done properly for these routes. It also seems that we really need to have the PMTU information always for netfilter TCPMSS clamp-to-pmtu feature to work properly. So for the time being, cache separate copies of input routes for each next hop exception. Signed-off-by: Timo Teräs Reviewed-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 3 ++- net/ipv4/route.c | 65 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 52 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8f6cb7a87cd..d5dbca5ecf6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -169,7 +169,8 @@ static void free_nh_exceptions(struct fib_nh *nh) next = rcu_dereference_protected(fnhe->fnhe_next, 1); - rt_fibinfo_free(&fnhe->fnhe_rth); + rt_fibinfo_free(&fnhe->fnhe_rth_input); + rt_fibinfo_free(&fnhe->fnhe_rth_output); kfree(fnhe); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f3fa42eac46..a9a54a23683 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -565,10 +565,25 @@ static inline void rt_free(struct rtable *rt) static DEFINE_SPINLOCK(fnhe_lock); +static void fnhe_flush_routes(struct fib_nh_exception *fnhe) +{ + struct rtable *rt; + + rt = rcu_dereference(fnhe->fnhe_rth_input); + if (rt) { + RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); + rt_free(rt); + } + rt = rcu_dereference(fnhe->fnhe_rth_output); + if (rt) { + RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); + rt_free(rt); + } +} + static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) { struct fib_nh_exception *fnhe, *oldest; - struct rtable *orig; oldest = rcu_dereference(hash->chain); for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; @@ -576,11 +591,7 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) oldest = fnhe; } - orig = rcu_dereference(oldest->fnhe_rth); - if (orig) { - RCU_INIT_POINTER(oldest->fnhe_rth, NULL); - rt_free(orig); - } + fnhe_flush_routes(oldest); return oldest; } @@ -644,7 +655,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_expires = max(1UL, expires); } /* Update all cached dsts too */ - rt = rcu_dereference(fnhe->fnhe_rth); + rt = rcu_dereference(fnhe->fnhe_rth_input); + if (rt) + fill_route_from_fnhe(rt, fnhe); + rt = rcu_dereference(fnhe->fnhe_rth_output); if (rt) fill_route_from_fnhe(rt, fnhe); } else { @@ -668,6 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, * stale, so anyone caching it rechecks if this exception * applies to them. */ + rt = rcu_dereference(nh->nh_rth_input); + if (rt) + rt->dst.obsolete = DST_OBSOLETE_KILL; + for_each_possible_cpu(i) { struct rtable __rcu **prt; prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); @@ -1242,25 +1260,36 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { + struct rtable __rcu **porig; + struct rtable *orig; int genid = fnhe_genid(dev_net(rt->dst.dev)); - struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); + + if (rt_is_input_route(rt)) + porig = &fnhe->fnhe_rth_input; + else + porig = &fnhe->fnhe_rth_output; + orig = rcu_dereference(*porig); if (fnhe->fnhe_genid != genid) { fnhe->fnhe_genid = genid; fnhe->fnhe_gw = 0; fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; + fnhe_flush_routes(fnhe); + orig = NULL; } fill_route_from_fnhe(rt, fnhe); if (!rt->rt_gateway) rt->rt_gateway = daddr; - rcu_assign_pointer(fnhe->fnhe_rth, rt); - if (orig) - rt_free(orig); + if (!(rt->dst.flags & DST_NOCACHE)) { + rcu_assign_pointer(*porig, rt); + if (orig) + rt_free(orig); + ret = true; + } fnhe->fnhe_stamp = jiffies; - ret = true; } spin_unlock_bh(&fnhe_lock); @@ -1492,6 +1521,7 @@ static int __mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { + struct fib_nh_exception *fnhe; struct rtable *rth; int err; struct in_device *out_dev; @@ -1538,8 +1568,13 @@ static int __mkroute_input(struct sk_buff *skb, } } + fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (fnhe != NULL) + rth = rcu_dereference(fnhe->fnhe_rth_input); + else + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -1567,7 +1602,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.input = ip_forward; rth->dst.output = ip_output; - rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); + rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); skb_dst_set(skb, &rth->dst); out: err = 0; @@ -1882,7 +1917,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = find_exception(nh, fl4->daddr); if (fnhe) - prth = &fnhe->fnhe_rth; + prth = &fnhe->fnhe_rth_output; else { if (unlikely(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH && -- cgit v1.2.3 From 496e4ae7dc944faa1721bfda7e9d834d5611a874 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 29 Jun 2013 14:15:47 +0200 Subject: netfilter: nf_queue: add NFQA_SKB_CSUM_NOTVERIFIED info flag The common case is that TCP/IP checksums have already been verified, e.g. by hardware (rx checksum offload), or conntrack. Userspace can use this flag to determine when the checksum has not been validated yet. If the flag is set, this doesn't necessarily mean that the packet has an invalid checksum, e.g. if NIC doesn't support rx checksum. Userspace that sucessfully enabled NFQA_CFG_F_GSO queue feature flag can infer that IP/TCP checksum has already been validated if either the SKB_INFO attribute is not present or the NFQA_SKB_CSUM_NOTVERIFIED flag is unset. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue_core.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 299a48ae5dc..971ea145ab3 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -280,12 +280,17 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) skb_shinfo(to)->nr_frags = j; } -static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet) +static int +nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, + bool csum_verify) { __u32 flags = 0; if (packet->ip_summed == CHECKSUM_PARTIAL) flags = NFQA_SKB_CSUMNOTREADY; + else if (csum_verify) + flags = NFQA_SKB_CSUM_NOTVERIFIED; + if (skb_is_gso(packet)) flags |= NFQA_SKB_GSO; @@ -310,6 +315,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, struct net_device *outdev; struct nf_conn *ct = NULL; enum ip_conntrack_info uninitialized_var(ctinfo); + bool csum_verify; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) @@ -327,6 +333,12 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->tstamp.tv64) size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + if (entry->hook <= NF_INET_FORWARD || + (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL)) + csum_verify = !skb_csum_unnecessary(entskb); + else + csum_verify = false; + outdev = entry->outdev; switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { @@ -476,7 +488,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) goto nla_put_failure; - if (nfqnl_put_packet_info(skb, entskb)) + if (nfqnl_put_packet_info(skb, entskb, csum_verify)) goto nla_put_failure; if (data_len) { -- cgit v1.2.3 From fb825a550a1af75323cee9d62d6fb818384c8c95 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 28 Jun 2013 16:07:40 -0700 Subject: openvswitch: Add Kconfig dependency on GRE-DEMUX. Openvswitch uses function from NET_IPGRE_DEMUX module. Add Kconfig dependency to fix following compilation errors: http://marc.info/?l=linux-netdev&m=137244035226634 CC: Jesse Gross Reported-by: Randy Dunlap Signed-off-by: Pravin Shelar Acked-by: Randy Dunlap Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/openvswitch/Kconfig | 16 ++++++++++++++-- net/openvswitch/vport-gre.c | 5 +++-- net/openvswitch/vport.c | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 9fbc04a31ed..27ee56b688a 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -19,8 +19,6 @@ config OPENVSWITCH which is able to accept configuration from a variety of sources and translate it into packet processing rules. - Open vSwitch GRE support depends on CONFIG_NET_IPGRE_DEMUX. - See http://openvswitch.org for more information and userspace utilities. @@ -28,3 +26,17 @@ config OPENVSWITCH called openvswitch. If unsure, say N. + +config OPENVSWITCH_GRE + bool "Open vSwitch GRE tunneling support" + depends on INET + depends on OPENVSWITCH + depends on NET_IPGRE_DEMUX && !(OPENVSWITCH=y && NET_IPGRE_DEMUX=m) + default y + ---help--- + If you say Y here, then the Open vSwitch will be able create GRE + vport. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 943e5c43135..493e9775dcd 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -16,7 +16,7 @@ * 02110-1301, USA */ -#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) +#ifdef CONFIG_OPENVSWITCH_GRE #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include @@ -271,4 +271,5 @@ const struct vport_ops ovs_gre_vport_ops = { .get_name = gre_get_name, .send = gre_tnl_send, }; -#endif + +#endif /* OPENVSWITCH_GRE */ diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index ba81294219a..d4c7fa04ce0 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -39,7 +39,7 @@ static const struct vport_ops *vport_ops_list[] = { &ovs_netdev_vport_ops, &ovs_internal_vport_ops, -#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) +#ifdef CONFIG_OPENVSWITCH_GRE &ovs_gre_vport_ops, #endif }; -- cgit v1.2.3 From 5c29fb12e8fb8a8105ea048cb160fd79a85a52bb Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 27 Jun 2013 22:46:04 +0200 Subject: ipv6: only apply anti-spoofing checks to not-pointopoint tunnels Because of commit 218774dc341f219bfcf940304a081b121a0e8099 ("ipv6: add anti-spoofing checks for 6to4 and 6rd") the sit driver dropped packets for 2002::/16 destinations and sources even when configured to work as a tunnel with fixed endpoint. We may only apply the 6rd/6to4 anti-spoofing checks if the device is not in pointopoint mode. This was an oversight from me in the above commit, sorry. Thanks to Roman Mamedov for reporting this! Reported-by: Roman Mamedov Cc: David Miller Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 97a0bfe2c29..85ff37b1ce0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -593,7 +593,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel->dev->stats.rx_errors++; goto out; } - } else { + } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { if (is_spoofed_6rd(tunnel, iph->saddr, &ipv6_hdr(skb)->saddr) || is_spoofed_6rd(tunnel, iph->daddr, -- cgit v1.2.3 From 52bd4c0c1551daa2efa7bb9e01a2f4ea6d1311bb Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 28 Jun 2013 17:35:48 +0200 Subject: ipv6: fix ecmp lookup when oif is specified There is no reason to skip ECMP lookup when oif is specified, but this implies to check oif given by user when selecting another route. When the new route does not match oif requirement, we simply keep the initial one. Spotted-by: dingzhi Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/route.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7ca87b37c0e..9ff0b78a9c1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -83,6 +83,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, @@ -394,7 +395,8 @@ static int rt6_info_hash_nhsfn(unsigned int candidate_count, } static struct rt6_info *rt6_multipath_select(struct rt6_info *match, - struct flowi6 *fl6) + struct flowi6 *fl6, int oif, + int strict) { struct rt6_info *sibling, *next_sibling; int route_choosen; @@ -408,6 +410,8 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, &match->rt6i_siblings, rt6i_siblings) { route_choosen--; if (route_choosen == 0) { + if (rt6_score_route(sibling, oif, strict) < 0) + break; match = sibling; break; } @@ -743,7 +747,7 @@ restart: rt = fn->leaf; rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(rt, fl6); + rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); BACKTRACK(net, &fl6->saddr); out: dst_use(&rt->dst, jiffies); @@ -875,8 +879,8 @@ restart_2: restart: rt = rt6_select(fn, oif, strict | reachable); - if (rt->rt6i_nsiblings && oif == 0) - rt = rt6_multipath_select(rt, fl6); + if (rt->rt6i_nsiblings) + rt = rt6_multipath_select(rt, fl6, oif, strict | reachable); BACKTRACK(net, &fl6->saddr); if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) -- cgit v1.2.3 From c9ab4d85de222f3390c67aedc9c18a50e767531e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jun 2013 02:37:42 -0700 Subject: neighbour: fix a race in neigh_destroy() There is a race in neighbour code, because neigh_destroy() uses skb_queue_purge(&neigh->arp_queue) without holding neighbour lock, while other parts of the code assume neighbour rwlock is what protects arp_queue Convert all skb_queue_purge() calls to the __skb_queue_purge() variant Use __skb_queue_head_init() instead of skb_queue_head_init() to make clear we do not use arp_queue.lock And hold neigh->lock in neigh_destroy() to close the race. Reported-by: Joe Jin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2569ab2cafb..b7de821f98d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) we must kill timers etc. and move it to safe state. */ - skb_queue_purge(&n->arp_queue); + __skb_queue_purge(&n->arp_queue); n->arp_queue_len_bytes = 0; n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) @@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device if (!n) goto out_entries; - skb_queue_head_init(&n->arp_queue); + __skb_queue_head_init(&n->arp_queue); rwlock_init(&n->lock); seqlock_init(&n->ha_lock); n->updated = n->used = now; @@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh) if (neigh_del_timer(neigh)) pr_warn("Impossible event\n"); - skb_queue_purge(&neigh->arp_queue); + write_lock_bh(&neigh->lock); + __skb_queue_purge(&neigh->arp_queue); + write_unlock_bh(&neigh->lock); neigh->arp_queue_len_bytes = 0; if (dev->netdev_ops->ndo_neigh_destroy) @@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh) neigh->ops->error_report(neigh, skb); write_lock(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } @@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, write_lock_bh(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } out: -- cgit v1.2.3 From 442340639194762df7e61e8aabae44a18896eca1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 14 May 2013 16:53:40 -0400 Subject: svcrpc: introduce init_svc_cred Common helper to zero out fields of the svc_cred. Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 29b4ba93ab3..8d7860ee239 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -377,8 +377,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp) new->handle.data = tmp->handle.data; tmp->handle.data = NULL; new->mechctx = NULL; - new->cred.cr_group_info = NULL; - new->cred.cr_principal = NULL; + init_svc_cred(&new->cred); } static void @@ -392,9 +391,8 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp) memset(&new->seqdata, 0, sizeof(new->seqdata)); spin_lock_init(&new->seqdata.sd_lock); new->cred = tmp->cred; - tmp->cred.cr_group_info = NULL; new->cred.cr_principal = tmp->cred.cr_principal; - tmp->cred.cr_principal = NULL; + init_svc_cred(&tmp->cred); } static struct cache_head * -- cgit v1.2.3 From 0dc1531aca7fd1440918bd55844a054e9c29acad Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 14 May 2013 16:07:13 -0400 Subject: svcrpc: store gss mech in svc_cred Store a pointer to the gss mechanism used in the rq_cred and cl_cred. This will make it easier to enforce SP4_MACH_CRED, which needs to compare the mechanism used on the exchange_id with that used on protected operations. Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_mech_switch.c | 5 ++++- net/sunrpc/auth_gss/svcauth_gss.c | 4 +--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index defa9d33925..27ce2624093 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -139,11 +139,12 @@ void gss_mech_unregister(struct gss_api_mech *gm) } EXPORT_SYMBOL_GPL(gss_mech_unregister); -static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) +struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) { __module_get(gm->gm_owner); return gm; } +EXPORT_SYMBOL(gss_mech_get); static struct gss_api_mech * _gss_mech_get_by_name(const char *name) @@ -360,6 +361,7 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) } return 0; } +EXPORT_SYMBOL(gss_pseudoflavor_to_service); char * gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) @@ -379,6 +381,7 @@ gss_mech_put(struct gss_api_mech * gm) if (gm) module_put(gm->gm_owner); } +EXPORT_SYMBOL(gss_mech_put); /* The mech could probably be determined from the token instead, but it's just * as easy for now to pass it in. */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 8d7860ee239..0265bb3a868 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -391,7 +391,6 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp) memset(&new->seqdata, 0, sizeof(new->seqdata)); spin_lock_init(&new->seqdata.sd_lock); new->cred = tmp->cred; - new->cred.cr_principal = tmp->cred.cr_principal; init_svc_cred(&tmp->cred); } @@ -485,7 +484,7 @@ static int rsc_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; - gm = gss_mech_get_by_name(buf); + gm = rsci.cred.cr_gss_mech = gss_mech_get_by_name(buf); status = -EOPNOTSUPP; if (!gm) goto out; @@ -515,7 +514,6 @@ static int rsc_parse(struct cache_detail *cd, rscp = rsc_update(cd, &rsci, rscp); status = 0; out: - gss_mech_put(gm); rsc_free(&rsci); if (rscp) cache_put(&rscp->h, cd); -- cgit v1.2.3 From cf3aa02cb4a0c5af5557dd47f15a08a7df33182a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Jun 2013 11:09:06 -0400 Subject: svcrpc: fix handling of too-short rpc's If we detect that an rpc is too short, we abort and close the connection. Except, there's a bug here: we're leaving sk_datalen nonzero without leaving any pages in the sk_pages array. The most likely result of the inconsistency is a subsequent crash in svc_tcp_clear_pages. Also demote the BUG_ON in svc_tcp_clear_pages to a WARN. Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0f679df7d07..df74919c81c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -917,7 +917,10 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { - BUG_ON(svsk->sk_pages[i] == NULL); + if (svsk->sk_pages[i] == NULL) { + WARN_ON_ONCE(1); + continue; + } put_page(svsk->sk_pages[i]); svsk->sk_pages[i] = NULL; } @@ -1092,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) + if (svc_sock_reclen(svsk) < 8) { + svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ + } rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; -- cgit v1.2.3 From 1f691b07c5dc51b2055834f58c0f351defd97f27 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 Jun 2013 10:55:40 -0400 Subject: svcrpc: don't error out on small tcp fragment Though clients we care about mostly don't do this, it is possible for rpc requests to be sent in multiple fragments. Here we have a sanity check to ensure that the final received rpc isn't too small--except that the number we're actually checking is the length of just the final fragment, not of the whole rpc. So a perfectly legal rpc that's unluckily fragmented could cause the server to close the connection here. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index df74919c81c..305374d4fb9 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1095,7 +1095,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) { + if (svsk->sk_datalen < 8) { svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ } -- cgit v1.2.3 From f9e1aedc6c79f18bb56caa3735b94217c4ec1e0c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jun 2013 12:53:42 +1000 Subject: sunrpc/cache: remove races with queuing an upcall. We currently queue an upcall after setting CACHE_PENDING, and dequeue after clearing CACHE_PENDING. So a request should only be present when CACHE_PENDING is set. However we don't combine the test and the enqueue/dequeue in a protected region, so it is possible (if unlikely) for a race to result in a request being queued without CACHE_PENDING set, or a request to be absent despite CACHE_PENDING. So: include a test for CACHE_PENDING inside the regions of enqueue and dequeue where queue_lock is held, and abort the operation if the value is not as expected. Also remove the early 'return' from cache_dequeue() to ensure that it always removes all entries: As there is no locking between setting CACHE_PENDING and calling sunrpc_cache_pipe_upcall it is not inconceivable for some other thread to clear CACHE_PENDING and then someone else to set it and call sunrpc_cache_pipe_upcall, both before the original threads completed the call. With this, it perfectly safe and correct to: - call cache_dequeue() if and only if we have just cleared CACHE_PENDING - call sunrpc_cache_pipe_upcall() (via cache_make_upcall) if and only if we have just set CACHE_PENDING. Reported-by: Bodo Stroesser Signed-off-by: NeilBrown Signed-off-by: Bodo Stroesser Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 3b3f14fc02c..8e964ae8d7b 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1036,23 +1036,32 @@ static int cache_release(struct inode *inode, struct file *filp, static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch) { - struct cache_queue *cq; + struct cache_queue *cq, *tmp; + struct cache_request *cr; + struct list_head dequeued; + + INIT_LIST_HEAD(&dequeued); spin_lock(&queue_lock); - list_for_each_entry(cq, &detail->queue, list) + list_for_each_entry_safe(cq, tmp, &detail->queue, list) if (!cq->reader) { - struct cache_request *cr = container_of(cq, struct cache_request, q); + cr = container_of(cq, struct cache_request, q); if (cr->item != ch) continue; + if (test_bit(CACHE_PENDING, &ch->flags)) + /* Lost a race and it is pending again */ + break; if (cr->readers != 0) continue; - list_del(&cr->q.list); - spin_unlock(&queue_lock); - cache_put(cr->item, detail); - kfree(cr->buf); - kfree(cr); - return; + list_move(&cr->q.list, &dequeued); } spin_unlock(&queue_lock); + while (!list_empty(&dequeued)) { + cr = list_entry(dequeued.next, struct cache_request, q.list); + list_del(&cr->q.list); + cache_put(cr->item, detail); + kfree(cr->buf); + kfree(cr); + } } /* @@ -1166,6 +1175,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) char *buf; struct cache_request *crq; + int ret = 0; if (!detail->cache_request) return -EINVAL; @@ -1191,10 +1201,18 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); - list_add_tail(&crq->q.list, &detail->queue); + if (test_bit(CACHE_PENDING, &h->flags)) + list_add_tail(&crq->q.list, &detail->queue); + else + /* Lost a race, no longer PENDING, so don't enqueue */ + ret = -EAGAIN; spin_unlock(&queue_lock); wake_up(&queue_wait); - return 0; + if (ret == -EAGAIN) { + kfree(buf); + kfree(crq); + } + return ret; } EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); -- cgit v1.2.3 From 2a1c7f53fd31e46f51780b61eb99fffef4c3c5a6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jun 2013 12:53:42 +1000 Subject: sunrpc/cache: use cache_fresh_unlocked consistently and correctly. cache_fresh_unlocked() is called when a cache entry has been updated and ensures that if there were any pending upcalls, they are cleared. So every time we update a cache entry, we should call this, and this should be the only way that we try to clear pending calls (that sort of uniformity makes code sooo much easier to read). try_to_negate_entry() will (possibly) mark an entry as negative. If it doesn't, it is because the entry already is VALID. So the entry will be valid on exit, so it is appropriate to call cache_fresh_unlocked(). So tidy up try_to_negate_entry() to do that, and remove partial open-coded cache_fresh_unlocked() from the one call-site of try_to_negate_entry(). In the other branch of the 'switch(cache_make_upcall())', we again have a partial open-coded version of cache_fresh_unlocked(). Replace that with a real call. And again in cache_clean(), use a real call to cache_fresh_unlocked(). These call sites might previously have called cache_revisit_request() if CACHE_PENDING wasn't set. This is never necessary because cache_revisit_request() can only do anything if the item is in the cache_defer_hash, However any time that an item is added to the cache_defer_hash (setup_deferral), the code immediately tests CACHE_PENDING, and removes the entry again if it is clear. So all other places we only need to 'cache_revisit_request' if we've just cleared CACHE_PENDING. Reported-by: Bodo Stroesser Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 8e964ae8d7b..29c463396a2 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -228,15 +228,14 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h write_lock(&detail->hash_lock); rv = cache_is_valid(h); - if (rv != -EAGAIN) { - write_unlock(&detail->hash_lock); - return rv; + if (rv == -EAGAIN) { + set_bit(CACHE_NEGATIVE, &h->flags); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); + rv = -ENOENT; } - set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); write_unlock(&detail->hash_lock); cache_fresh_unlocked(h, detail); - return -ENOENT; + return rv; } /* @@ -275,13 +274,10 @@ int cache_check(struct cache_detail *detail, if (!test_and_set_bit(CACHE_PENDING, &h->flags)) { switch (cache_make_upcall(detail, h)) { case -EINVAL: - clear_bit(CACHE_PENDING, &h->flags); - cache_revisit_request(h); rv = try_to_negate_entry(detail, h); break; case -EAGAIN: - clear_bit(CACHE_PENDING, &h->flags); - cache_revisit_request(h); + cache_fresh_unlocked(h, detail); break; } } @@ -457,9 +453,7 @@ static int cache_clean(void) current_index ++; spin_unlock(&cache_list_lock); if (ch) { - if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) - cache_dequeue(current_detail, ch); - cache_revisit_request(ch); + cache_fresh_unlocked(ch, d); cache_put(ch, d); } } else -- cgit v1.2.3 From 013920eb5db97e99a4c30c8400f1c616e2a8b0a2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jun 2013 12:53:42 +1000 Subject: sunrpc/cache: ensure items removed from cache do not have pending upcalls. It is possible for a race to set CACHE_PENDING after cache_clean() has removed a cache entry from the cache. If CACHE_PENDING is still set when the entry is finally 'put', the cache_dequeue() will never happen and we can leak memory. So set a new flag 'CACHE_CLEANED' when we remove something from the cache, and don't queue any upcall if it is set. If CACHE_PENDING is set before CACHE_CLEANED, the call that cache_clean() makes to cache_fresh_unlocked() will free memory as needed. If CACHE_PENDING is set after CACHE_CLEANED, the test in sunrpc_cache_pipe_upcall will ensure that the memory is not allocated. Reported-by: Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 29c463396a2..b12144c5edd 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -306,7 +306,7 @@ EXPORT_SYMBOL_GPL(cache_check); * a current pointer into that list and into the table * for that entry. * - * Each time clean_cache is called it finds the next non-empty entry + * Each time cache_clean is called it finds the next non-empty entry * in the current table and walks the list in that entry * looking for entries that can be removed. * @@ -453,6 +453,7 @@ static int cache_clean(void) current_index ++; spin_unlock(&cache_list_lock); if (ch) { + set_bit(CACHE_CLEANED, &ch->flags); cache_fresh_unlocked(ch, d); cache_put(ch, d); } @@ -1178,6 +1179,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) warn_no_listener(detail); return -EINVAL; } + if (test_bit(CACHE_CLEANED, &h->flags)) + /* Too late to make an upcall */ + return -EAGAIN; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) -- cgit v1.2.3 From 7715cde86857d4bb40f43f1ee971cf906eaf1b9c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jun 2013 12:53:42 +1000 Subject: net/sunrpc: xpt_auth_cache should be ignored when expired. commit d202cce8963d9268ff355a386e20243e8332b308 sunrpc: never return expired entries in sunrpc_cache_lookup moved the 'entry is expired' test from cache_check to sunrpc_cache_lookup, so that it happened early and some races could safely be ignored. However the ip_map (in svcauth_unix.c) has a separate single-item cache which allows quick lookup without locking. An entry in this case would not be subject to the expiry test and so could be used well after it has expired. This is not normally a big problem because the first time it is used after it is expired an up-call will be scheduled to refresh the entry (if it hasn't been scheduled already) and the old entry will then be invalidated. So on the second attempt to use it after it has expired, ip_map_cached_get will discard it. However that is subtle and not ideal, so replace the "!cache_valid" test with "cache_is_expired". In doing this we drop the test on the "CACHE_VALID" bit. This is unnecessary as the bit is never cleared, and an entry will only be cached if the bit is set. Reported-by: Bodo Stroesser Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 6 ------ net/sunrpc/svcauth_unix.c | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index b12144c5edd..5478a015ab0 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -50,12 +50,6 @@ static void cache_init(struct cache_head *h) h->last_refresh = now; } -static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) -{ - return (h->expiry_time < seconds_since_boot()) || - (detail->flush_time > h->last_refresh); -} - struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash) { diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 06bdf5a1082..a98853dfccd 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -347,13 +347,13 @@ ip_map_cached_get(struct svc_xprt *xprt) spin_lock(&xprt->xpt_lock); ipm = xprt->xpt_auth_cache; if (ipm != NULL) { - if (!cache_valid(&ipm->h)) { + sn = net_generic(xprt->xpt_net, sunrpc_net_id); + if (cache_is_expired(sn->ip_map_cache, &ipm->h)) { /* * The entry has been invalidated since it was * remembered, e.g. by a second mount from the * same IP address. */ - sn = net_generic(xprt->xpt_net, sunrpc_net_id); xprt->xpt_auth_cache = NULL; spin_unlock(&xprt->xpt_lock); cache_put(&ipm->h, sn->ip_map_cache); -- cgit v1.2.3 From 0bebc633f1428163c9659fd16b34c745e60a0757 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jun 2013 12:53:42 +1000 Subject: sunrpc: Don't schedule an upcall on a replaced cache entry. When a cache entry is replaced, the "expiry_time" get set to zero by a call to "cache_fresh_locked(..., 0)" at the end of "sunrpc_cache_update". This low expiry time makes cache_check() think that the 'refresh_age' is negative, so the 'age' is comparatively large and a refresh is triggered. However refreshing a replaced entry it pointless, it cannot achieve anything useful. So teach cache_check to ignore a low refresh_age when expiry_time is zero. Reported-by: Bodo Stroesser Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 5478a015ab0..49eb37010aa 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -262,7 +262,8 @@ int cache_check(struct cache_detail *detail, if (rqstp == NULL) { if (rv == -EAGAIN) rv = -ENOENT; - } else if (rv == -EAGAIN || age > refresh_age/2) { + } else if (rv == -EAGAIN || + (h->expiry_time != 0 && age > refresh_age/2)) { dprintk("RPC: Want update, refage=%ld, age=%ld\n", refresh_age, age); if (!test_and_set_bit(CACHE_PENDING, &h->flags)) { -- cgit v1.2.3 From aec0a40a6f78843c0ce73f7398230ee5184f896d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jun 2013 07:40:57 -0700 Subject: netem: use rb tree to implement the time queue Following typical setup to implement a ~100 ms RTT and big amount of reorders has very poor performance because netem implements the time queue using a linked list. ----------------------------------------------------------- ETH=eth0 IFB=ifb0 modprobe ifb ip link set dev $IFB up tc qdisc add dev $ETH ingress 2>/dev/null tc filter add dev $ETH parent ffff: \ protocol ip u32 match u32 0 0 flowid 1:1 action mirred egress \ redirect dev $IFB ethtool -K $ETH gro off tso off gso off tc qdisc add dev $IFB root netem delay 50ms 10ms limit 100000 tc qd add dev $ETH root netem delay 50ms limit 100000 --------------------------------------------------------- Switch netem time queue to a rb tree, so this kind of setup can work at high speed. Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 109 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 3d2acc7a9c8..ed0082cf8ef 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -68,7 +69,8 @@ */ struct netem_sched_data { - /* internal t(ime)fifo qdisc uses sch->q and sch->limit */ + /* internal t(ime)fifo qdisc uses t_root and sch->limit */ + struct rb_root t_root; /* optional qdisc for classful handling (NULL at netem init) */ struct Qdisc *qdisc; @@ -128,10 +130,35 @@ struct netem_sched_data { */ struct netem_skb_cb { psched_time_t time_to_send; + ktime_t tstamp_save; }; +/* Because space in skb->cb[] is tight, netem overloads skb->next/prev/tstamp + * to hold a rb_node structure. + * + * If struct sk_buff layout is changed, the following checks will complain. + */ +static struct rb_node *netem_rb_node(struct sk_buff *skb) +{ + BUILD_BUG_ON(offsetof(struct sk_buff, next) != 0); + BUILD_BUG_ON(offsetof(struct sk_buff, prev) != + offsetof(struct sk_buff, next) + sizeof(skb->next)); + BUILD_BUG_ON(offsetof(struct sk_buff, tstamp) != + offsetof(struct sk_buff, prev) + sizeof(skb->prev)); + BUILD_BUG_ON(sizeof(struct rb_node) > sizeof(skb->next) + + sizeof(skb->prev) + + sizeof(skb->tstamp)); + return (struct rb_node *)&skb->next; +} + +static struct sk_buff *netem_rb_to_skb(struct rb_node *rb) +{ + return (struct sk_buff *)rb; +} + static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { + /* we assume we can use skb next/prev/tstamp as storage for rb_node */ qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; } @@ -333,20 +360,23 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { - struct sk_buff_head *list = &sch->q; + struct netem_sched_data *q = qdisc_priv(sch); psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; - struct sk_buff *skb = skb_peek_tail(list); + struct rb_node **p = &q->t_root.rb_node, *parent = NULL; - /* Optimize for add at tail */ - if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) - return __skb_queue_tail(list, nskb); + while (*p) { + struct sk_buff *skb; - skb_queue_reverse_walk(list, skb) { + parent = *p; + skb = netem_rb_to_skb(parent); if (tnext >= netem_skb_cb(skb)->time_to_send) - break; + p = &parent->rb_right; + else + p = &parent->rb_left; } - - __skb_queue_after(list, skb, nskb); + rb_link_node(netem_rb_node(nskb), parent, p); + rb_insert_color(netem_rb_node(nskb), &q->t_root); + sch->q.qlen++; } /* @@ -436,23 +466,28 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) now = psched_get_time(); if (q->rate) { - struct sk_buff_head *list = &sch->q; + struct sk_buff *last; - if (!skb_queue_empty(list)) { + if (!skb_queue_empty(&sch->q)) + last = skb_peek_tail(&sch->q); + else + last = netem_rb_to_skb(rb_last(&q->t_root)); + if (last) { /* * Last packet in queue is reference point (now), * calculate this time bonus and subtract * from delay. */ - delay -= netem_skb_cb(skb_peek_tail(list))->time_to_send - now; + delay -= netem_skb_cb(last)->time_to_send - now; delay = max_t(psched_tdiff_t, 0, delay); - now = netem_skb_cb(skb_peek_tail(list))->time_to_send; + now = netem_skb_cb(last)->time_to_send; } delay += packet_len_2_sched_time(skb->len, q); } cb->time_to_send = now + delay; + cb->tstamp_save = skb->tstamp; ++q->counter; tfifo_enqueue(skb, sch); } else { @@ -476,6 +511,21 @@ static unsigned int netem_drop(struct Qdisc *sch) unsigned int len; len = qdisc_queue_drop(sch); + + if (!len) { + struct rb_node *p = rb_first(&q->t_root); + + if (p) { + struct sk_buff *skb = netem_rb_to_skb(p); + + rb_erase(p, &q->t_root); + sch->q.qlen--; + skb->next = NULL; + skb->prev = NULL; + len = qdisc_pkt_len(skb); + kfree_skb(skb); + } + } if (!len && q->qdisc && q->qdisc->ops->drop) len = q->qdisc->ops->drop(q->qdisc); if (len) @@ -488,19 +538,32 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; + struct rb_node *p; if (qdisc_is_throttled(sch)) return NULL; tfifo_dequeue: - skb = qdisc_peek_head(sch); + skb = __skb_dequeue(&sch->q); if (skb) { - const struct netem_skb_cb *cb = netem_skb_cb(skb); +deliver: + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_unthrottled(sch); + qdisc_bstats_update(sch, skb); + return skb; + } + p = rb_first(&q->t_root); + if (p) { + skb = netem_rb_to_skb(p); /* if more time remaining? */ - if (cb->time_to_send <= psched_get_time()) { - __skb_unlink(skb, &sch->q); - sch->qstats.backlog -= qdisc_pkt_len(skb); + if (netem_skb_cb(skb)->time_to_send <= psched_get_time()) { + rb_erase(p, &q->t_root); + + sch->q.qlen--; + skb->next = NULL; + skb->prev = NULL; + skb->tstamp = netem_skb_cb(skb)->tstamp_save; #ifdef CONFIG_NET_CLS_ACT /* @@ -522,10 +585,7 @@ tfifo_dequeue: } goto tfifo_dequeue; } -deliver: - qdisc_unthrottled(sch); - qdisc_bstats_update(sch, skb); - return skb; + goto deliver; } if (q->qdisc) { @@ -533,7 +593,8 @@ deliver: if (skb) goto deliver; } - qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); + qdisc_watchdog_schedule(&q->watchdog, + netem_skb_cb(skb)->time_to_send); } if (q->qdisc) { -- cgit v1.2.3 From 4ccb93ce7439b63c31bc7597bfffd13567fa483d Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 28 Jun 2013 12:13:52 -0400 Subject: x25: Fix broken locking in ioctl error paths. Two of the x25 ioctl cases have error paths that break out of the function without unlocking the socket, leading to this warning: ================================================ [ BUG: lock held when returning to user space! ] 3.10.0-rc7+ #36 Not tainted ------------------------------------------------ trinity-child2/31407 is leaving the kernel with locks still held! 1 lock held by trinity-child2/31407: #0: (sk_lock-AF_X25){+.+.+.}, at: [] x25_ioctl+0x8a/0x740 [x25] Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/x25/af_x25.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 1d964e23853..45a3ab5612c 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1583,11 +1583,11 @@ out_cud_release: case SIOCX25CALLACCPTAPPRV: { rc = -EINVAL; lock_sock(sk); - if (sk->sk_state != TCP_CLOSE) - break; - clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + if (sk->sk_state == TCP_CLOSE) { + clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + rc = 0; + } release_sock(sk); - rc = 0; break; } @@ -1595,14 +1595,15 @@ out_cud_release: rc = -EINVAL; lock_sock(sk); if (sk->sk_state != TCP_ESTABLISHED) - break; + goto out_sendcallaccpt_release; /* must call accptapprv above */ if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) - break; + goto out_sendcallaccpt_release; x25_write_internal(sk, X25_CALL_ACCEPTED); x25->state = X25_STATE_3; - release_sock(sk); rc = 0; +out_sendcallaccpt_release: + release_sock(sk); break; } -- cgit v1.2.3 From bb33381d0c97cdee25f2cdab540b6e2bd16fa03b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 28 Jun 2013 19:49:40 +0200 Subject: net: sctp: rework debugging framework to use pr_debug and friends We should get rid of all own SCTP debug printk macros and use the ones that the kernel offers anyway instead. This makes the code more readable and conform to the kernel code, and offers all the features of dynamic debbuging that pr_debug() et al has, such as only turning on/off portions of debug messages at runtime through debugfs. The runtime cost of having CONFIG_DYNAMIC_DEBUG enabled, but none of the debug statements printing, is negligible [1]. If kernel debugging is completly turned off, then these statements will also compile into "empty" functions. While we're at it, we also need to change the Kconfig option as it /now/ only refers to the ifdef'ed code portions in outqueue.c that enable further debugging/tracing of SCTP transaction fields. Also, since SCTP_ASSERT code was enabled with this Kconfig option and has now been removed, we transform those code parts into WARNs resp. where appropriate BUG_ONs so that those bugs can be more easily detected as probably not many people have SCTP debugging permanently turned on. To turn on all SCTP debugging, the following steps are needed: # mount -t debugfs none /sys/kernel/debug # echo -n 'module sctp +p' > /sys/kernel/debug/dynamic_debug/control This can be done more fine-grained on a per file, per line basis and others as described in [2]. [1] https://www.kernel.org/doc/ols/2009/ols2009-pages-39-46.pdf [2] Documentation/dynamic-debug-howto.txt Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sctp/Kconfig | 12 ++-- net/sctp/associola.c | 67 ++++++++++--------- net/sctp/chunk.c | 5 +- net/sctp/debug.c | 4 -- net/sctp/endpointola.c | 5 +- net/sctp/input.c | 6 +- net/sctp/inqueue.c | 9 ++- net/sctp/ipv6.c | 21 +++--- net/sctp/output.c | 40 +++++------ net/sctp/outqueue.c | 160 ++++++++++++++++++++------------------------ net/sctp/protocol.c | 48 +++++++------- net/sctp/sm_make_chunk.c | 25 +++---- net/sctp/sm_sideeffect.c | 96 +++++++++++---------------- net/sctp/sm_statefuns.c | 77 +++++++++++++--------- net/sctp/socket.c | 168 ++++++++++++++++++++++------------------------- net/sctp/transport.c | 49 +++++++------- 16 files changed, 383 insertions(+), 409 deletions(-) (limited to 'net') diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index cf4852814e0..d80bf1aebae 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -30,7 +30,8 @@ menuconfig IP_SCTP homing at either or both ends of an association." To compile this protocol support as a module, choose M here: the - module will be called sctp. + module will be called sctp. Debug messages are handeled by the + kernel's dynamic debugging framework. If in doubt, say N. @@ -48,13 +49,14 @@ config NET_SCTPPROBE To compile this code as a module, choose M here: the module will be called sctp_probe. -config SCTP_DBG_MSG - bool "SCTP: Debug messages" +config SCTP_DBG_TSNS + bool "SCTP: Debug transactions" help - If you say Y, this will enable verbose debugging messages. + If you say Y, this will enable transaction debugging, visible + from the kernel's dynamic debugging framework. If unsure, say N. However, if you are running into problems, use - this option to gather detailed trace information + this option to gather outqueue trace information. config SCTP_DBG_OBJCNT bool "SCTP: Debug object counts" diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 9a383a8774e..bce5b79662a 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -357,7 +357,8 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, goto fail_init; SCTP_DBG_OBJCNT_INC(assoc); - SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); + + pr_debug("Created asoc %p\n", asoc); return asoc; @@ -455,7 +456,10 @@ void sctp_association_free(struct sctp_association *asoc) /* Cleanup and free up an association. */ static void sctp_association_destroy(struct sctp_association *asoc) { - SCTP_ASSERT(asoc->base.dead, "Assoc is not dead", return); + if (unlikely(!asoc->base.dead)) { + WARN(1, "Attempt to destroy undead association %p!\n", asoc); + return; + } sctp_endpoint_put(asoc->ep); sock_put(asoc->base.sk); @@ -536,11 +540,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, struct list_head *pos; struct sctp_transport *transport; - SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_rm_peer:association %p addr: ", - " port: %d\n", - asoc, - (&peer->ipaddr), - ntohs(peer->ipaddr.v4.sin_port)); + pr_debug("%s: association:%p addr:%pISpc\n", + __func__, asoc, &peer->ipaddr.sa); /* If we are to remove the current retran_path, update it * to the next peer before removing this peer from the list. @@ -636,12 +637,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* AF_INET and AF_INET6 share common port field. */ port = ntohs(addr->v4.sin_port); - SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ", - " port: %d state:%d\n", - asoc, - addr, - port, - peer_state); + pr_debug("%s: association:%p addr:%pISpc state:%d\n", __func__, + asoc, &addr->sa, peer_state); /* Set the port if it has not been set yet. */ if (0 == asoc->peer.port) @@ -708,8 +705,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, else asoc->pathmtu = peer->pathmtu; - SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " - "%d\n", asoc, asoc->pathmtu); + pr_debug("%s: association:%p PMTU set to %d\n", __func__, asoc, + asoc->pathmtu); + peer->pmtu_pending = 0; asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); @@ -1349,12 +1347,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) else t = asoc->peer.retran_path; - SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" - " %p addr: ", - " port: %d\n", - asoc, - (&t->ipaddr), - ntohs(t->ipaddr.v4.sin_port)); + pr_debug("%s: association:%p addr:%pISpc\n", __func__, asoc, + &t->ipaddr.sa); } /* Choose the transport for sending retransmit packet. */ @@ -1401,8 +1395,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) asoc->frag_point = sctp_frag_point(asoc, pmtu); } - SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n", - __func__, asoc, asoc->pathmtu, asoc->frag_point); + pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc, + asoc->pathmtu, asoc->frag_point); } /* Should we send a SACK to update our peer? */ @@ -1454,9 +1448,9 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) asoc->rwnd_press -= change; } - SCTP_DEBUG_PRINTK("%s: asoc %p rwnd increased by %d to (%u, %u) " - "- %u\n", __func__, asoc, len, asoc->rwnd, - asoc->rwnd_over, asoc->a_rwnd); + pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n", + __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, + asoc->a_rwnd); /* Send a window update SACK if the rwnd has increased by at least the * minimum of the association's PMTU and half of the receive buffer. @@ -1465,9 +1459,11 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) */ if (sctp_peer_needs_update(asoc)) { asoc->a_rwnd = asoc->rwnd; - SCTP_DEBUG_PRINTK("%s: Sending window update SACK- asoc: %p " - "rwnd: %u a_rwnd: %u\n", __func__, - asoc, asoc->rwnd, asoc->a_rwnd); + + pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u " + "a_rwnd:%u\n", __func__, asoc, asoc->rwnd, + asoc->a_rwnd); + sack = sctp_make_sack(asoc); if (!sack) return; @@ -1489,8 +1485,10 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) int rx_count; int over = 0; - SCTP_ASSERT(asoc->rwnd, "rwnd zero", return); - SCTP_ASSERT(!asoc->rwnd_over, "rwnd_over not zero", return); + if (unlikely(!asoc->rwnd || asoc->rwnd_over)) + pr_debug("%s: association:%p has asoc->rwnd:%u, " + "asoc->rwnd_over:%u!\n", __func__, asoc, + asoc->rwnd, asoc->rwnd_over); if (asoc->ep->rcvbuf_policy) rx_count = atomic_read(&asoc->rmem_alloc); @@ -1515,9 +1513,10 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) asoc->rwnd_over = len - asoc->rwnd; asoc->rwnd = 0; } - SCTP_DEBUG_PRINTK("%s: asoc %p rwnd decreased by %d to (%u, %u, %u)\n", - __func__, asoc, len, asoc->rwnd, - asoc->rwnd_over, asoc->rwnd_press); + + pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n", + __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, + asoc->rwnd_press); } /* Build the bind address list for the association based on info from the diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7135fc0c087..5780565f5b7 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -193,8 +193,9 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, msg->expires_at = jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); msg->can_abandon = 1; - SCTP_DEBUG_PRINTK("%s: msg:%p expires_at: %ld jiffies:%ld\n", - __func__, msg, msg->expires_at, jiffies); + + pr_debug("%s: msg:%p expires_at:%ld jiffies:%ld\n", __func__, + msg, msg->expires_at, jiffies); } /* This is the biggest possible DATA chunk that can fit into diff --git a/net/sctp/debug.c b/net/sctp/debug.c index ec997cfe0a7..f4998780d6d 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -47,10 +47,6 @@ #include -#if SCTP_DEBUG -int sctp_debug_flag = 1; /* Initially enable DEBUG */ -#endif /* SCTP_DEBUG */ - /* These are printable forms of Chunk ID's from section 3.1. */ static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = { "DATA", diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index b26999d508b..9e3d257de0e 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -249,7 +249,10 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; - SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); + if (unlikely(!ep->base.dead)) { + WARN(1, "Attempt to destroy undead endpoint %p!\n", ep); + return; + } /* Free the digest buffer */ kfree(ep->digest); diff --git a/net/sctp/input.c b/net/sctp/input.c index 4cfc74699a3..3fa4d858c35 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -454,8 +454,6 @@ void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t) { - SCTP_DEBUG_PRINTK("%s\n", __func__); - if (sock_owned_by_user(sk)) { if (timer_pending(&t->proto_unreach_timer)) return; @@ -464,10 +462,12 @@ void sctp_icmp_proto_unreachable(struct sock *sk, jiffies + (HZ/20))) sctp_association_hold(asoc); } - } else { struct net *net = sock_net(sk); + pr_debug("%s: unrecognized next header type " + "encountered!\n", __func__); + if (del_timer(&t->proto_unreach_timer)) sctp_association_put(asoc); diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 3221d073448..cb25f040fed 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -219,10 +219,10 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) chunk->end_of_packet = 1; } - SCTP_DEBUG_PRINTK("+++sctp_inq_pop+++ chunk %p[%s]," - " length %d, skb->len %d\n",chunk, - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), - ntohs(chunk->chunk_hdr->length), chunk->skb->len); + pr_debug("+++sctp_inq_pop+++ chunk:%p[%s], length:%d, skb->len:%d\n", + chunk, sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), + ntohs(chunk->chunk_hdr->length), chunk->skb->len); + return chunk; } @@ -238,4 +238,3 @@ void sctp_inq_set_th_handler(struct sctp_inq *q, work_func_t callback) { INIT_WORK(&q->immediate, callback); } - diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index adeaa0e64f5..09ffcc912d2 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -239,9 +239,8 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) fl6.daddr = *rt0->addr; } - SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", - __func__, skb, skb->len, - &fl6.saddr, &fl6.daddr); + pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, + skb->len, &fl6.saddr, &fl6.daddr); SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); @@ -276,7 +275,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) fl6->flowi6_oif = daddr->v6.sin6_scope_id; - SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl6->daddr); + pr_debug("%s: dst=%pI6 ", __func__, &fl6->daddr); if (asoc) fl6->fl6_sport = htons(asoc->base.bind_addr.port); @@ -284,7 +283,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (saddr) { fl6->saddr = saddr->v6.sin6_addr; fl6->fl6_sport = saddr->v6.sin6_port; - SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr); + + pr_debug("src=%pI6 - ", &fl6->saddr); } dst = ip6_dst_lookup_flow(sk, fl6, NULL, false); @@ -348,13 +348,16 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, out: if (!IS_ERR_OR_NULL(dst)) { struct rt6_info *rt; + rt = (struct rt6_info *)dst; t->dst = dst; - SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n", - &rt->rt6i_dst.addr, &fl6->saddr); + + pr_debug("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr, + &fl6->saddr); } else { t->dst = NULL; - SCTP_DEBUG_PRINTK("NO ROUTE\n"); + + pr_debug("no route\n"); } } @@ -377,7 +380,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, struct flowi6 *fl6 = &fl->u.ip6; union sctp_addr *saddr = &t->saddr; - SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p\n", __func__, t->asoc, t->dst); + pr_debug("%s: asoc:%p dst:%p\n", __func__, t->asoc, t->dst); if (t->dst) { saddr->v6.sin6_family = AF_INET6; diff --git a/net/sctp/output.c b/net/sctp/output.c index bbef4a7a9b5..a46d1eb4176 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -93,8 +93,7 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, { struct sctp_chunk *chunk = NULL; - SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __func__, - packet, vtag); + pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); packet->vtag = vtag; @@ -119,8 +118,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, struct sctp_association *asoc = transport->asoc; size_t overhead; - SCTP_DEBUG_PRINTK("%s: packet:%p transport:%p\n", __func__, - packet, transport); + pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport); packet->transport = transport; packet->source_port = sport; @@ -145,7 +143,7 @@ void sctp_packet_free(struct sctp_packet *packet) { struct sctp_chunk *chunk, *tmp; - SCTP_DEBUG_PRINTK("%s: packet:%p\n", __func__, packet); + pr_debug("%s: packet:%p\n", __func__, packet); list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); @@ -167,8 +165,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, sctp_xmit_t retval; int error = 0; - SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __func__, - packet, chunk); + pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { case SCTP_XMIT_PMTU_FULL: @@ -334,8 +331,7 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, { sctp_xmit_t retval = SCTP_XMIT_OK; - SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __func__, packet, - chunk); + pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); /* Data chunks are special. Before seeing what else we can * bundle into this packet, check to see if we are allowed to @@ -402,7 +398,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) unsigned char *auth = NULL; /* pointer to auth in skb data */ __u32 cksum_buf_len = sizeof(struct sctphdr); - SCTP_DEBUG_PRINTK("%s: packet:%p\n", __func__, packet); + pr_debug("%s: packet:%p\n", __func__, packet); /* Do NOT generate a chunkless packet. */ if (list_empty(&packet->chunk_list)) @@ -472,7 +468,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) * * [This whole comment explains WORD_ROUND() below.] */ - SCTP_DEBUG_PRINTK("***sctp_transmit_packet***\n"); + + pr_debug("***sctp_transmit_packet***\n"); + list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { @@ -505,16 +503,13 @@ int sctp_packet_transmit(struct sctp_packet *packet) memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, chunk->skb->len); - SCTP_DEBUG_PRINTK("%s %p[%s] %s 0x%x, %s %d, %s %d, %s %d\n", - "*** Chunk", chunk, - sctp_cname(SCTP_ST_CHUNK( - chunk->chunk_hdr->type)), - chunk->has_tsn ? "TSN" : "No TSN", - chunk->has_tsn ? - ntohl(chunk->subh.data_hdr->tsn) : 0, - "length", ntohs(chunk->chunk_hdr->length), - "chunk->skb->len", chunk->skb->len, - "rtt_in_progress", chunk->rtt_in_progress); + pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, " + "rtt_in_progress:%d\n", chunk, + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), + chunk->has_tsn ? "TSN" : "No TSN", + chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, + ntohs(chunk->chunk_hdr->length), chunk->skb->len, + chunk->rtt_in_progress); /* * If this is a control chunk, this is our last @@ -606,8 +601,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) } } - SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n", - nskb->len); + pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); nskb->local_df = packet->ipfragok; (*tp->af_specific->sctp_xmit)(nskb, tp); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index be35e2dbcc9..511b3b35d60 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -299,10 +299,10 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) struct net *net = sock_net(q->asoc->base.sk); int error = 0; - SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n", - q, chunk, chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) - : "Illegal Chunk"); + pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk, + chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk"); /* If it is data, queue it up, otherwise, send it * immediately. @@ -328,10 +328,10 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) break; default: - SCTP_DEBUG_PRINTK("outqueueing (%p, %p[%s])\n", - q, chunk, chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) - : "Illegal Chunk"); + pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n", + __func__, q, chunk, chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk"); sctp_outq_tail_data(q, chunk); if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) @@ -460,14 +460,10 @@ void sctp_retransmit_mark(struct sctp_outq *q, } } - SCTP_DEBUG_PRINTK("%s: transport: %p, reason: %d, " - "cwnd: %d, ssthresh: %d, flight_size: %d, " - "pba: %d\n", __func__, - transport, reason, - transport->cwnd, transport->ssthresh, - transport->flight_size, - transport->partial_bytes_acked); - + pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d, " + "flight_size:%d, pba:%d\n", __func__, transport, reason, + transport->cwnd, transport->ssthresh, transport->flight_size, + transport->partial_bytes_acked); } /* Mark all the eligible packets on a transport for retransmission and force @@ -1014,19 +1010,13 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) sctp_transport_burst_limited(transport); } - SCTP_DEBUG_PRINTK("sctp_outq_flush(%p, %p[%s]), ", - q, chunk, - chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK( - chunk->chunk_hdr->type)) - : "Illegal Chunk"); - - SCTP_DEBUG_PRINTK("TX TSN 0x%x skb->head " - "%p skb->users %d.\n", - ntohl(chunk->subh.data_hdr->tsn), - chunk->skb ?chunk->skb->head : NULL, - chunk->skb ? - atomic_read(&chunk->skb->users) : -1); + pr_debug("%s: outq:%p, chunk:%p[%s], tx-tsn:0x%x skb->head:%p " + "skb->users:%d\n", + __func__, q, chunk, chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk", ntohl(chunk->subh.data_hdr->tsn), + chunk->skb ? chunk->skb->head : NULL, chunk->skb ? + atomic_read(&chunk->skb->users) : -1); /* Add the chunk to the packet. */ status = sctp_packet_transmit_chunk(packet, chunk, 0); @@ -1038,10 +1028,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) /* We could not append this chunk, so put * the chunk back on the output queue. */ - SCTP_DEBUG_PRINTK("sctp_outq_flush: could " - "not transmit TSN: 0x%x, status: %d\n", - ntohl(chunk->subh.data_hdr->tsn), - status); + pr_debug("%s: could not transmit tsn:0x%x, status:%d\n", + __func__, ntohl(chunk->subh.data_hdr->tsn), + status); + sctp_outq_head_data(q, chunk); goto sctp_flush_out; break; @@ -1284,11 +1274,10 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) sctp_generate_fwdtsn(q, sack_ctsn); - SCTP_DEBUG_PRINTK("%s: sack Cumulative TSN Ack is 0x%x.\n", - __func__, sack_ctsn); - SCTP_DEBUG_PRINTK("%s: Cumulative TSN Ack of association, " - "%p is 0x%x. Adv peer ack point: 0x%x\n", - __func__, asoc, ctsn, asoc->adv_peer_ack_point); + pr_debug("%s: sack cumulative tsn ack:0x%x\n", __func__, sack_ctsn); + pr_debug("%s: cumulative tsn ack of assoc:%p is 0x%x, " + "advertised peer ack point:0x%x\n", __func__, asoc, ctsn, + asoc->adv_peer_ack_point); /* See if all chunks are acked. * Make sure the empty queue handler will get run later. @@ -1304,7 +1293,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) goto finish; } - SCTP_DEBUG_PRINTK("sack queue is empty.\n"); + pr_debug("%s: sack queue is empty\n", __func__); finish: return q->empty; } @@ -1348,7 +1337,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* These state variables are for coherent debug output. --xguo */ -#if SCTP_DEBUG +#ifdef CONFIG_SCTP_DBG_TSNS __u32 dbg_ack_tsn = 0; /* An ACKed TSN range starts here... */ __u32 dbg_last_ack_tsn = 0; /* ...and finishes here. */ __u32 dbg_kept_tsn = 0; /* An un-ACKed range starts here... */ @@ -1359,7 +1348,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * -1: We need to initialize. */ int dbg_prt_state = -1; -#endif /* SCTP_DEBUG */ +#endif /* CONFIG_SCTP_DBG_TSNS */ sack_ctsn = ntohl(sack->cum_tsn_ack); @@ -1483,7 +1472,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, list_add_tail(lchunk, &tlist); } -#if SCTP_DEBUG +#ifdef CONFIG_SCTP_DBG_TSNS switch (dbg_prt_state) { case 0: /* last TSN was ACKed */ if (dbg_last_ack_tsn + 1 == tsn) { @@ -1497,42 +1486,39 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* Display the end of the * current range. */ - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_ack_tsn); + pr_cont("-%08x", dbg_last_ack_tsn); } /* Start a new range. */ - SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); + pr_cont(",%08x", tsn); dbg_ack_tsn = tsn; break; case 1: /* The last TSN was NOT ACKed. */ if (dbg_last_kept_tsn != dbg_kept_tsn) { /* Display the end of current range. */ - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_kept_tsn); + pr_cont("-%08x", dbg_last_kept_tsn); } - SCTP_DEBUG_PRINTK_CONT("\n"); - + pr_cont("\n"); /* FALL THROUGH... */ default: /* This is the first-ever TSN we examined. */ /* Start a new range of ACK-ed TSNs. */ - SCTP_DEBUG_PRINTK("ACKed: %08x", tsn); + pr_debug("ACKed: %08x", tsn); + dbg_prt_state = 0; dbg_ack_tsn = tsn; } dbg_last_ack_tsn = tsn; -#endif /* SCTP_DEBUG */ +#endif /* CONFIG_SCTP_DBG_TSNS */ } else { if (tchunk->tsn_gap_acked) { - SCTP_DEBUG_PRINTK("%s: Receiver reneged on " - "data TSN: 0x%x\n", - __func__, - tsn); + pr_debug("%s: receiver reneged on data TSN:0x%x\n", + __func__, tsn); + tchunk->tsn_gap_acked = 0; if (tchunk->transport) @@ -1552,7 +1538,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, list_add_tail(lchunk, &tlist); -#if SCTP_DEBUG +#ifdef CONFIG_SCTP_DBG_TSNS /* See the above comments on ACK-ed TSNs. */ switch (dbg_prt_state) { case 1: @@ -1560,50 +1546,47 @@ static void sctp_check_transmitted(struct sctp_outq *q, break; if (dbg_last_kept_tsn != dbg_kept_tsn) - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_kept_tsn); + pr_cont("-%08x", dbg_last_kept_tsn); - SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); + pr_cont(",%08x", tsn); dbg_kept_tsn = tsn; break; case 0: if (dbg_last_ack_tsn != dbg_ack_tsn) - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_ack_tsn); - SCTP_DEBUG_PRINTK_CONT("\n"); + pr_cont("-%08x", dbg_last_ack_tsn); + pr_cont("\n"); /* FALL THROUGH... */ default: - SCTP_DEBUG_PRINTK("KEPT: %08x",tsn); + pr_debug("KEPT: %08x", tsn); + dbg_prt_state = 1; dbg_kept_tsn = tsn; } dbg_last_kept_tsn = tsn; -#endif /* SCTP_DEBUG */ +#endif /* CONFIG_SCTP_DBG_TSNS */ } } -#if SCTP_DEBUG +#ifdef CONFIG_SCTP_DBG_TSNS /* Finish off the last range, displaying its ending TSN. */ switch (dbg_prt_state) { case 0: - if (dbg_last_ack_tsn != dbg_ack_tsn) { - SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn); - } else { - SCTP_DEBUG_PRINTK_CONT("\n"); - } - break; - + if (dbg_last_ack_tsn != dbg_ack_tsn) + pr_cont("-%08x\n", dbg_last_ack_tsn); + else + pr_cont("\n"); + break; case 1: - if (dbg_last_kept_tsn != dbg_kept_tsn) { - SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn); - } else { - SCTP_DEBUG_PRINTK_CONT("\n"); - } + if (dbg_last_kept_tsn != dbg_kept_tsn) + pr_cont("-%08x\n", dbg_last_kept_tsn); + else + pr_cont("\n"); + break; } -#endif /* SCTP_DEBUG */ +#endif /* CONFIG_SCTP_DBG_TSNS */ if (transport) { if (bytes_acked) { struct sctp_association *asoc = transport->asoc; @@ -1676,9 +1659,9 @@ static void sctp_check_transmitted(struct sctp_outq *q, !list_empty(&tlist) && (sack_ctsn+2 == q->asoc->next_tsn) && q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) { - SCTP_DEBUG_PRINTK("%s: SACK received for zero " - "window probe: %u\n", - __func__, sack_ctsn); + pr_debug("%s: sack received for zero window " + "probe:%u\n", __func__, sack_ctsn); + q->asoc->overall_error_count = 0; transport->error_count = 0; } @@ -1739,10 +1722,8 @@ static void sctp_mark_missing(struct sctp_outq *q, count_of_newacks, tsn)) { chunk->tsn_missing_report++; - SCTP_DEBUG_PRINTK( - "%s: TSN 0x%x missing counter: %d\n", - __func__, tsn, - chunk->tsn_missing_report); + pr_debug("%s: tsn:0x%x missing counter:%d\n", + __func__, tsn, chunk->tsn_missing_report); } } /* @@ -1762,11 +1743,10 @@ static void sctp_mark_missing(struct sctp_outq *q, if (do_fast_retransmit) sctp_retransmit(q, transport, SCTP_RTXR_FAST_RTX); - SCTP_DEBUG_PRINTK("%s: transport: %p, cwnd: %d, " - "ssthresh: %d, flight_size: %d, pba: %d\n", - __func__, transport, transport->cwnd, - transport->ssthresh, transport->flight_size, - transport->partial_bytes_acked); + pr_debug("%s: transport:%p, cwnd:%d, ssthresh:%d, " + "flight_size:%d, pba:%d\n", __func__, transport, + transport->cwnd, transport->ssthresh, + transport->flight_size, transport->partial_bytes_acked); } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1de49c802d8..4a17494d736 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -451,8 +451,8 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, fl4->fl4_sport = saddr->v4.sin_port; } - SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", - __func__, &fl4->daddr, &fl4->saddr); + pr_debug("%s: dst:%pI4, src:%pI4 - ", __func__, &fl4->daddr, + &fl4->saddr); rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) @@ -513,10 +513,10 @@ out_unlock: out: t->dst = dst; if (dst) - SCTP_DEBUG_PRINTK("rt_dst:%pI4, rt_src:%pI4\n", - &fl4->daddr, &fl4->saddr); + pr_debug("rt_dst:%pI4, rt_src:%pI4\n", + &fl4->daddr, &fl4->saddr); else - SCTP_DEBUG_PRINTK("NO ROUTE\n"); + pr_debug("no route\n"); } /* For v4, the source address is cached in the route entry(dst). So no need @@ -604,9 +604,9 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg) spin_lock_bh(&net->sctp.addr_wq_lock); list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { - SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ", - " for cmd %d at entry %p\n", &net->sctp.addr_waitq, &addrw->a, addrw->state, - addrw); + pr_debug("%s: the first ent in wq:%p is addr:%pISc for cmd:%d at " + "entry:%p\n", __func__, &net->sctp.addr_waitq, &addrw->a.sa, + addrw->state, addrw); #if IS_ENABLED(CONFIG_IPV6) /* Now we send an ASCONF for each association */ @@ -623,8 +623,10 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg) addrw->state == SCTP_ADDR_NEW) { unsigned long timeo_val; - SCTP_DEBUG_PRINTK("sctp_timo_handler: this is on DAD, trying %d sec later\n", - SCTP_ADDRESS_TICK_DELAY); + pr_debug("%s: this is on DAD, trying %d sec " + "later\n", __func__, + SCTP_ADDRESS_TICK_DELAY); + timeo_val = jiffies; timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); mod_timer(&net->sctp.addr_wq_timer, timeo_val); @@ -641,7 +643,7 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg) continue; sctp_bh_lock_sock(sk); if (sctp_asconf_mgmt(sp, addrw) < 0) - SCTP_DEBUG_PRINTK("sctp_addrwq_timo_handler: sctp_asconf_mgmt failed\n"); + pr_debug("%s: sctp_asconf_mgmt failed\n", __func__); sctp_bh_unlock_sock(sk); } #if IS_ENABLED(CONFIG_IPV6) @@ -707,9 +709,10 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm addrw = sctp_addr_wq_lookup(net, addr); if (addrw) { if (addrw->state != cmd) { - SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ", - " in wq %p\n", addrw->state, &addrw->a, - &net->sctp.addr_waitq); + pr_debug("%s: offsets existing entry for %d, addr:%pISc " + "in wq:%p\n", __func__, addrw->state, &addrw->a.sa, + &net->sctp.addr_waitq); + list_del(&addrw->list); kfree(addrw); } @@ -725,8 +728,9 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm } addrw->state = cmd; list_add_tail(&addrw->list, &net->sctp.addr_waitq); - SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ", - " in wq %p\n", addrw->state, &addrw->a, &net->sctp.addr_waitq); + + pr_debug("%s: add new entry for cmd:%d, addr:%pISc in wq:%p\n", + __func__, addrw->state, &addrw->a.sa, &net->sctp.addr_waitq); if (!timer_pending(&net->sctp.addr_wq_timer)) { timeo_val = jiffies; @@ -952,15 +956,14 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, { struct inet_sock *inet = inet_sk(skb->sk); - SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", - __func__, skb, skb->len, - &transport->fl.u.ip4.saddr, - &transport->fl.u.ip4.daddr); + pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb, + skb->len, &transport->fl.u.ip4.saddr, &transport->fl.u.ip4.daddr); inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); + return ip_queue_xmit(skb, &transport->fl); } @@ -1321,9 +1324,8 @@ static __init int sctp_init(void) int max_share; int order; - /* SCTP_DEBUG sanity check. */ - if (!sctp_sanity_check()) - goto out; + BUILD_BUG_ON(sizeof(struct sctp_ulpevent) > + sizeof(((struct sk_buff *) 0)->cb)); /* Allocate bind_bucket and chunk caches. */ status = -ENOBUFS; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index dd71f1f9ba1..362ae6e2fd9 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -741,7 +741,8 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); - SCTP_DEBUG_PRINTK("sackCTSNAck sent: 0x%x.\n", ctsn); + + pr_debug("%s: sackCTSNAck sent:0x%x\n", __func__, ctsn); /* How much room is needed in the chunk? */ num_gabs = sctp_tsnmap_num_gabs(map, gabs); @@ -1287,10 +1288,8 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, if (!retval) goto nodata; - - if (!sk) { - SCTP_DEBUG_PRINTK("chunkifying skb %p w/o an sk\n", skb); - } + if (!sk) + pr_debug("%s: chunkifying skb:%p w/o an sk\n", __func__, skb); INIT_LIST_HEAD(&retval->list); retval->skb = skb; @@ -2191,8 +2190,9 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; fallthrough: default: - SCTP_DEBUG_PRINTK("Unrecognized param: %d for chunk %d.\n", - ntohs(param.p->type), cid); + pr_debug("%s: unrecognized param:%d for chunk:%d\n", + __func__, ntohs(param.p->type), cid); + retval = sctp_process_unk_param(asoc, param, chunk, err_chunk); break; } @@ -2516,7 +2516,7 @@ do_addr_param: break; case SCTP_PARAM_HOST_NAME_ADDRESS: - SCTP_DEBUG_PRINTK("unimplemented SCTP_HOST_NAME_ADDRESS\n"); + pr_debug("%s: unimplemented SCTP_HOST_NAME_ADDRESS\n", __func__); break; case SCTP_PARAM_SUPPORTED_ADDRESS_TYPES: @@ -2662,8 +2662,8 @@ fall_through: * called prior to this routine. Simply log the error * here. */ - SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n", - ntohs(param.p->type), asoc); + pr_debug("%s: ignoring param:%d for association:%p.\n", + __func__, ntohs(param.p->type), asoc); break; } @@ -2805,7 +2805,10 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, totallen += paramlen; totallen += addr_param_len; del_pickup = 1; - SCTP_DEBUG_PRINTK("mkasconf_update_ip: picked same-scope del_pending addr, totallen for all addresses is %d\n", totallen); + + pr_debug("%s: picked same-scope del_pending addr, " + "totallen for all addresses is %d\n", + __func__, totallen); } } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index ff91f47b023..cf6f8451822 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -257,7 +257,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer) sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); + pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->T3_rtx_timer, jiffies + (HZ/20))) @@ -297,9 +297,8 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy: timer %d\n", - __func__, - timeout_type); + pr_debug("%s: sock is busy: timer %d\n", __func__, + timeout_type); /* Try again later. */ if (!mod_timer(&asoc->timers[timeout_type], jiffies + (HZ/20))) @@ -377,7 +376,7 @@ void sctp_generate_heartbeat_event(unsigned long data) sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); + pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->hb_timer, jiffies + (HZ/20))) @@ -415,7 +414,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); + pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->proto_unreach_timer, @@ -521,11 +520,9 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, if (transport->state != SCTP_INACTIVE && (transport->error_count > transport->pathmaxrxt)) { - SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", - " transport IP: port:%d failed.\n", - asoc, - (&transport->ipaddr), - ntohs(transport->ipaddr.v4.sin_port)); + pr_debug("%s: association:%p transport addr:%pISpc failed\n", + __func__, asoc, &transport->ipaddr.sa); + sctp_assoc_control_transport(asoc, transport, SCTP_TRANSPORT_DOWN, SCTP_FAILED_THRESHOLD); @@ -804,8 +801,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, asoc->state = state; - SCTP_DEBUG_PRINTK("sctp_cmd_new_state: asoc %p[%s]\n", - asoc, sctp_state_tbl[state]); + pr_debug("%s: asoc:%p[%s]\n", __func__, asoc, sctp_state_tbl[state]); if (sctp_style(sk, TCP)) { /* Change the sk->sk_state of a TCP-style socket that has @@ -1017,15 +1013,11 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, asoc->timeouts[timer] = asoc->max_init_timeo; } asoc->init_cycle++; - SCTP_DEBUG_PRINTK( - "T1 %s Timeout adjustment" - " init_err_counter: %d" - " cycle: %d" - " timeout: %ld\n", - name, - asoc->init_err_counter, - asoc->init_cycle, - asoc->timeouts[timer]); + + pr_debug("%s: T1[%s] timeout adjustment init_err_counter:%d" + " cycle:%d timeout:%ld\n", __func__, name, + asoc->init_err_counter, asoc->init_cycle, + asoc->timeouts[timer]); } } @@ -1080,23 +1072,19 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc) * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. */ -#define DEBUG_PRE \ - SCTP_DEBUG_PRINTK("sctp_do_sm prefn: " \ - "ep %p, %s, %s, asoc %p[%s], %s\n", \ - ep, sctp_evttype_tbl[event_type], \ - (*debug_fn)(subtype), asoc, \ - sctp_state_tbl[state], state_fn->name) - -#define DEBUG_POST \ - SCTP_DEBUG_PRINTK("sctp_do_sm postfn: " \ - "asoc %p, status: %s\n", \ - asoc, sctp_status_tbl[status]) - -#define DEBUG_POST_SFX \ - SCTP_DEBUG_PRINTK("sctp_do_sm post sfx: error %d, asoc %p[%s]\n", \ - error, asoc, \ - sctp_state_tbl[(asoc && sctp_id2assoc(ep->base.sk, \ - sctp_assoc2id(asoc)))?asoc->state:SCTP_STATE_CLOSED]) +#define debug_pre_sfn() \ + pr_debug("%s[pre-fn]: ep:%p, %s, %s, asoc:%p[%s], %s\n", __func__, \ + ep, sctp_evttype_tbl[event_type], (*debug_fn)(subtype), \ + asoc, sctp_state_tbl[state], state_fn->name) + +#define debug_post_sfn() \ + pr_debug("%s[post-fn]: asoc:%p, status:%s\n", __func__, asoc, \ + sctp_status_tbl[status]) + +#define debug_post_sfx() \ + pr_debug("%s[post-sfx]: error:%d, asoc:%p[%s]\n", __func__, error, \ + asoc, sctp_state_tbl[(asoc && sctp_id2assoc(ep->base.sk, \ + sctp_assoc2id(asoc))) ? asoc->state : SCTP_STATE_CLOSED]) /* * This is the master state machine processing function. @@ -1116,7 +1104,6 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_disposition_t status; int error = 0; typedef const char *(printfn_t)(sctp_subtype_t); - static printfn_t *table[] = { NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname, }; @@ -1129,21 +1116,18 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_init_cmd_seq(&commands); - DEBUG_PRE; + debug_pre_sfn(); status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands); - DEBUG_POST; + debug_post_sfn(); error = sctp_side_effects(event_type, subtype, state, ep, asoc, event_arg, status, &commands, gfp); - DEBUG_POST_SFX; + debug_post_sfx(); return error; } -#undef DEBUG_PRE -#undef DEBUG_POST - /***************************************************************** * This the master state function side effect processing function. *****************************************************************/ @@ -1172,9 +1156,9 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, switch (status) { case SCTP_DISPOSITION_DISCARD: - SCTP_DEBUG_PRINTK("Ignored sctp protocol event - state %d, " - "event_type %d, event_id %d\n", - state, event_type, subtype.chunk); + pr_debug("%s: ignored sctp protocol event - state:%d, " + "event_type:%d, event_id:%d\n", __func__, state, + event_type, subtype.chunk); break; case SCTP_DISPOSITION_NOMEM: @@ -1425,18 +1409,18 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_CHUNK_ULP: /* Send a chunk to the sockets layer. */ - SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", - "chunk_up:", cmd->obj.chunk, - "ulpq:", &asoc->ulpq); + pr_debug("%s: sm_sideff: chunk_up:%p, ulpq:%p\n", + __func__, cmd->obj.chunk, &asoc->ulpq); + sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk, GFP_ATOMIC); break; case SCTP_CMD_EVENT_ULP: /* Send a notification to the sockets layer. */ - SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", - "event_up:",cmd->obj.ulpevent, - "ulpq:",&asoc->ulpq); + pr_debug("%s: sm_sideff: event_up:%p, ulpq:%p\n", + __func__, cmd->obj.ulpevent, &asoc->ulpq); + sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent); break; @@ -1601,7 +1585,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_REPORT_BAD_TAG: - SCTP_DEBUG_PRINTK("vtag mismatch!\n"); + pr_debug("%s: vtag mismatch!\n", __func__); break; case SCTP_CMD_STRIKE: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index b3d18685651..f6b7109195a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1179,9 +1179,9 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net, /* Check if the timestamp looks valid. */ if (time_after(hbinfo->sent_at, jiffies) || time_after(jiffies, hbinfo->sent_at + max_interval)) { - SCTP_DEBUG_PRINTK("%s: HEARTBEAT ACK with invalid timestamp " - "received for transport: %p\n", - __func__, link); + pr_debug("%s: HEARTBEAT ACK with invalid timestamp received " + "for transport:%p\n", __func__, link); + return SCTP_DISPOSITION_DISCARD; } @@ -2562,7 +2562,8 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, const struct sctp_association *asoc, struct sctp_transport *transport) { - SCTP_DEBUG_PRINTK("ABORT received (INIT).\n"); + pr_debug("%s: ABORT received (INIT)\n", __func__); + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); @@ -2572,6 +2573,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, /* CMD_INIT_FAILED will DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, SCTP_PERR(error)); + return SCTP_DISPOSITION_ABORT; } @@ -2637,8 +2639,9 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net, ctsn = ntohl(sdh->cum_tsn_ack); if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { - SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); - SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); + pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn, + asoc->ctsn_ack_point); + return SCTP_DISPOSITION_DISCARD; } @@ -2721,8 +2724,9 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net, ctsn = ntohl(sdh->cum_tsn_ack); if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { - SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); - SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); + pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn, + asoc->ctsn_ack_point); + return SCTP_DISPOSITION_DISCARD; } @@ -3174,8 +3178,9 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, * Point indicates an out-of-order SACK. */ if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { - SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); - SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); + pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn, + asoc->ctsn_ack_point); + return SCTP_DISPOSITION_DISCARD; } @@ -3859,7 +3864,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, skb_pull(chunk->skb, len); tsn = ntohl(fwdtsn_hdr->new_cum_tsn); - SCTP_DEBUG_PRINTK("%s: TSN 0x%x.\n", __func__, tsn); + pr_debug("%s: TSN 0x%x\n", __func__, tsn); /* The TSN is too high--silently discard the chunk and count on it * getting retransmitted later. @@ -3927,7 +3932,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( skb_pull(chunk->skb, len); tsn = ntohl(fwdtsn_hdr->new_cum_tsn); - SCTP_DEBUG_PRINTK("%s: TSN 0x%x.\n", __func__, tsn); + pr_debug("%s: TSN 0x%x\n", __func__, tsn); /* The TSN is too high--silently discard the chunk and count on it * getting retransmitted later. @@ -4166,7 +4171,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, struct sctp_chunk *err_chunk; sctp_chunkhdr_t *hdr; - SCTP_DEBUG_PRINTK("Processing the unknown chunk id %d.\n", type.chunk); + pr_debug("%s: processing unknown chunk id:%d\n", __func__, type.chunk); if (!sctp_vtag_verify(unk_chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -4256,7 +4261,8 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk); + pr_debug("%s: chunk:%d is discarded\n", __func__, type.chunk); + return SCTP_DISPOSITION_DISCARD; } @@ -5184,7 +5190,9 @@ sctp_disposition_t sctp_sf_ignore_primitive( void *arg, sctp_cmd_seq_t *commands) { - SCTP_DEBUG_PRINTK("Primitive type %d is ignored.\n", type.primitive); + pr_debug("%s: primitive type:%d is ignored\n", __func__, + type.primitive); + return SCTP_DISPOSITION_DISCARD; } @@ -5379,7 +5387,9 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net, void *arg, sctp_cmd_seq_t *commands) { - SCTP_DEBUG_PRINTK("The event other type %d is ignored\n", type.other); + pr_debug("%s: the event other type:%d is ignored\n", + __func__, type.other); + return SCTP_DISPOSITION_DISCARD; } @@ -5527,7 +5537,8 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, struct sctp_bind_addr *bp; int attempts = asoc->init_err_counter + 1; - SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); + pr_debug("%s: timer T1 expired (INIT)\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { @@ -5546,9 +5557,10 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); } else { - SCTP_DEBUG_PRINTK("Giving up on INIT, attempts: %d" - " max_init_attempts: %d\n", - attempts, asoc->max_init_attempts); + pr_debug("%s: giving up on INIT, attempts:%d " + "max_init_attempts:%d\n", __func__, attempts, + asoc->max_init_attempts); + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, @@ -5588,7 +5600,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net, struct sctp_chunk *repl = NULL; int attempts = asoc->init_err_counter + 1; - SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); + pr_debug("%s: timer T1 expired (COOKIE-ECHO)\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { @@ -5636,7 +5649,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net, { struct sctp_chunk *reply = NULL; - SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); + pr_debug("%s: timer T2 expired\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS); ((struct sctp_association *)asoc)->shutdown_retries++; @@ -5777,7 +5791,8 @@ sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net, { struct sctp_chunk *reply = NULL; - SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); + pr_debug("%s: timer T5 expired\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); @@ -5892,7 +5907,8 @@ sctp_disposition_t sctp_sf_timer_ignore(struct net *net, void *arg, sctp_cmd_seq_t *commands) { - SCTP_DEBUG_PRINTK("Timer %d ignored.\n", type.chunk); + pr_debug("%s: timer %d ignored\n", __func__, type.chunk); + return SCTP_DISPOSITION_CONSUME; } @@ -6102,7 +6118,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); tsn = ntohl(data_hdr->tsn); - SCTP_DEBUG_PRINTK("eat_data: TSN 0x%x.\n", tsn); + pr_debug("%s: TSN 0x%x\n", __func__, tsn); /* ASSERT: Now skb->data is really the user data. */ @@ -6179,12 +6195,12 @@ static int sctp_eat_data(const struct sctp_association *asoc, */ if (sctp_tsnmap_has_gap(map) && (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { - SCTP_DEBUG_PRINTK("Reneging for tsn:%u\n", tsn); + pr_debug("%s: reneging for tsn:%u\n", __func__, tsn); deliver = SCTP_CMD_RENEGE; } else { - SCTP_DEBUG_PRINTK("Discard tsn: %u len: %Zd, " - "rwnd: %d\n", tsn, datalen, - asoc->rwnd); + pr_debug("%s: discard tsn:%u len:%zu, rwnd:%d\n", + __func__, tsn, datalen, asoc->rwnd); + return SCTP_IERROR_IGNORE_TSN; } } @@ -6199,7 +6215,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, if (*sk->sk_prot_creator->memory_pressure) { if (sctp_tsnmap_has_gap(map) && (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { - SCTP_DEBUG_PRINTK("Under Pressure! Reneging for tsn:%u\n", tsn); + pr_debug("%s: under pressure, reneging for tsn:%u\n", + __func__, tsn); deliver = SCTP_CMD_RENEGE; } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 66fcdcfe1b7..d5c6a287047 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -281,8 +281,8 @@ static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) sctp_lock_sock(sk); - SCTP_DEBUG_PRINTK("sctp_bind(sk: %p, addr: %p, addr_len: %d)\n", - sk, addr, addr_len); + pr_debug("%s: sk:%p, addr:%p, addr_len:%d\n", __func__, sk, + addr, addr_len); /* Disallow binding twice. */ if (!sctp_sk(sk)->ep->base.bind_addr.port) @@ -342,19 +342,15 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Common sockaddr verification. */ af = sctp_sockaddr_af(sp, addr, len); if (!af) { - SCTP_DEBUG_PRINTK("sctp_do_bind(sk: %p, newaddr: %p, len: %d) EINVAL\n", - sk, addr, len); + pr_debug("%s: sk:%p, newaddr:%p, len:%d EINVAL\n", + __func__, sk, addr, len); return -EINVAL; } snum = ntohs(addr->v4.sin_port); - SCTP_DEBUG_PRINTK_IPADDR("sctp_do_bind(sk: %p, new addr: ", - ", port: %d, new port: %d, len: %d)\n", - sk, - addr, - bp->port, snum, - len); + pr_debug("%s: sk:%p, new addr:%pISc, port:%d, new port:%d, len:%d\n", + __func__, sk, &addr->sa, bp->port, snum, len); /* PF specific bind() address verification. */ if (!sp->pf->bind_verify(sp, addr)) @@ -368,9 +364,8 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if (!snum) snum = bp->port; else if (snum != bp->port) { - SCTP_DEBUG_PRINTK("sctp_do_bind:" - " New port %d does not match existing port " - "%d.\n", snum, bp->port); + pr_debug("%s: new port %d doesn't match existing port " + "%d\n", __func__, snum, bp->port); return -EINVAL; } } @@ -468,8 +463,8 @@ static int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt) struct sockaddr *sa_addr; struct sctp_af *af; - SCTP_DEBUG_PRINTK("sctp_bindx_add (sk: %p, addrs: %p, addrcnt: %d)\n", - sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk, + addrs, addrcnt); addr_buf = addrs; for (cnt = 0; cnt < addrcnt; cnt++) { @@ -535,11 +530,10 @@ static int sctp_send_asconf_add_ip(struct sock *sk, sp = sctp_sk(sk); ep = sp->ep; - SCTP_DEBUG_PRINTK("%s: (sk: %p, addrs: %p, addrcnt: %d)\n", - __func__, sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", + __func__, sk, addrs, addrcnt); list_for_each_entry(asoc, &ep->asocs, asocs) { - if (!asoc->peer.asconf_capable) continue; @@ -646,8 +640,8 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) union sctp_addr *sa_addr; struct sctp_af *af; - SCTP_DEBUG_PRINTK("sctp_bindx_rem (sk: %p, addrs: %p, addrcnt: %d)\n", - sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", + __func__, sk, addrs, addrcnt); addr_buf = addrs; for (cnt = 0; cnt < addrcnt; cnt++) { @@ -740,8 +734,8 @@ static int sctp_send_asconf_del_ip(struct sock *sk, sp = sctp_sk(sk); ep = sp->ep; - SCTP_DEBUG_PRINTK("%s: (sk: %p, addrs: %p, addrcnt: %d)\n", - __func__, sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", + __func__, sk, addrs, addrcnt); list_for_each_entry(asoc, &ep->asocs, asocs) { @@ -808,9 +802,11 @@ static int sctp_send_asconf_del_ip(struct sock *sk, sin6 = (struct sockaddr_in6 *)addrs; asoc->asconf_addr_del_pending->v6.sin6_addr = sin6->sin6_addr; } - SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ", - " at %p\n", asoc, asoc->asconf_addr_del_pending, - asoc->asconf_addr_del_pending); + + pr_debug("%s: keep the last address asoc:%p %pISc at %p\n", + __func__, asoc, &asoc->asconf_addr_del_pending->sa, + asoc->asconf_addr_del_pending); + asoc->src_out_of_asoc_ok = 1; stored = 1; goto skip_mkasconf; @@ -972,8 +968,8 @@ static int sctp_setsockopt_bindx(struct sock* sk, void *addr_buf; struct sctp_af *af; - SCTP_DEBUG_PRINTK("sctp_setsockopt_bindx: sk %p addrs %p" - " addrs_size %d opt %d\n", sk, addrs, addrs_size, op); + pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n", + __func__, sk, addrs, addrs_size, op); if (unlikely(addrs_size <= 0)) return -EINVAL; @@ -1231,10 +1227,9 @@ static int __sctp_connect(struct sock* sk, asoc = NULL; out_free: + pr_debug("%s: took out_free path with asoc:%p kaddrs:%p err:%d\n", + __func__, asoc, kaddrs, err); - SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p" - " kaddrs: %p err: %d\n", - asoc, kaddrs, err); if (asoc) { /* sctp_primitive_ASSOCIATE may have added this association * To the hash table, try to unhash it, just in case, its a noop @@ -1316,8 +1311,8 @@ static int __sctp_setsockopt_connectx(struct sock* sk, int err = 0; struct sockaddr *kaddrs; - SCTP_DEBUG_PRINTK("%s - sk %p addrs %p addrs_size %d\n", - __func__, sk, addrs, addrs_size); + pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", + __func__, sk, addrs, addrs_size); if (unlikely(addrs_size <= 0)) return -EINVAL; @@ -1468,7 +1463,7 @@ static void sctp_close(struct sock *sk, long timeout) struct list_head *pos, *temp; unsigned int data_was_unread; - SCTP_DEBUG_PRINTK("sctp_close(sk: 0x%p, timeout:%ld)\n", sk, timeout); + pr_debug("%s: sk:%p, timeout:%ld\n", __func__, sk, timeout); sctp_lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; @@ -1594,14 +1589,12 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct sctp_datamsg *datamsg; int msg_flags = msg->msg_flags; - SCTP_DEBUG_PRINTK("sctp_sendmsg(sk: %p, msg: %p, msg_len: %zu)\n", - sk, msg, msg_len); - err = 0; sp = sctp_sk(sk); ep = sp->ep; - SCTP_DEBUG_PRINTK("Using endpoint: %p.\n", ep); + pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk, + msg, msg_len, ep); /* We cannot send a message over a TCP-style listening socket. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) { @@ -1611,9 +1604,8 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* Parse out the SCTP CMSGs. */ err = sctp_msghdr_parse(msg, &cmsgs); - if (err) { - SCTP_DEBUG_PRINTK("msghdr parse err = %x\n", err); + pr_debug("%s: msghdr parse err:%x\n", __func__, err); goto out_nounlock; } @@ -1645,8 +1637,8 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, associd = sinfo->sinfo_assoc_id; } - SCTP_DEBUG_PRINTK("msg_len: %zu, sinfo_flags: 0x%x\n", - msg_len, sinfo_flags); + pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__, + msg_len, sinfo_flags); /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */ if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) { @@ -1675,7 +1667,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, transport = NULL; - SCTP_DEBUG_PRINTK("About to look up association.\n"); + pr_debug("%s: about to look up association\n", __func__); sctp_lock_sock(sk); @@ -1705,7 +1697,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } if (asoc) { - SCTP_DEBUG_PRINTK("Just looked up association: %p.\n", asoc); + pr_debug("%s: just looked up association:%p\n", __func__, asoc); /* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED * socket that has an association in CLOSED state. This can @@ -1718,8 +1710,9 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } if (sinfo_flags & SCTP_EOF) { - SCTP_DEBUG_PRINTK("Shutting down association: %p\n", - asoc); + pr_debug("%s: shutting down association:%p\n", + __func__, asoc); + sctp_primitive_SHUTDOWN(net, asoc, NULL); err = 0; goto out_unlock; @@ -1732,7 +1725,9 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; } - SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); + pr_debug("%s: aborting association:%p\n", + __func__, asoc); + sctp_primitive_ABORT(net, asoc, chunk); err = 0; goto out_unlock; @@ -1741,7 +1736,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* Do we need to create the association? */ if (!asoc) { - SCTP_DEBUG_PRINTK("There is no association yet.\n"); + pr_debug("%s: there is no association yet\n", __func__); if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) { err = -EINVAL; @@ -1840,7 +1835,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } /* ASSERT: we have a valid association at this point. */ - SCTP_DEBUG_PRINTK("We have a valid association.\n"); + pr_debug("%s: we have a valid association\n", __func__); if (!sinfo) { /* If the user didn't specify SNDRCVINFO, make up one with @@ -1909,7 +1904,8 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err < 0) goto out_free; - SCTP_DEBUG_PRINTK("We associated primitively.\n"); + + pr_debug("%s: we associated primitively\n", __func__); } /* Break the message into multiple chunks of maximum size. */ @@ -1936,17 +1932,15 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, */ err = sctp_primitive_SEND(net, asoc, datamsg); /* Did the lower layer accept the chunk? */ - if (err) + if (err) { sctp_datamsg_free(datamsg); - else - sctp_datamsg_put(datamsg); + goto out_free; + } - SCTP_DEBUG_PRINTK("We sent primitively.\n"); + pr_debug("%s: we sent primitively\n", __func__); - if (err) - goto out_free; - else - err = msg_len; + sctp_datamsg_put(datamsg); + err = msg_len; /* If we are already past ASSOCIATE, the lower * layers are responsible for association cleanup. @@ -2041,10 +2035,9 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, int err = 0; int skb_len; - SCTP_DEBUG_PRINTK("sctp_recvmsg(%s: %p, %s: %p, %s: %zd, %s: %d, %s: " - "0x%x, %s: %p)\n", "sk", sk, "msghdr", msg, - "len", len, "knoblauch", noblock, - "flags", flags, "addr_len", addr_len); + pr_debug("%s: sk:%p, msghdr:%p, len:%zd, noblock:%d, flags:0x%x, " + "addr_len:%p)\n", __func__, sk, msg, len, noblock, flags, + addr_len); sctp_lock_sock(sk); @@ -3086,7 +3079,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva err = sctp_send_asconf(asoc, chunk); - SCTP_DEBUG_PRINTK("We set peer primary addr primitively.\n"); + pr_debug("%s: we set peer primary addr primitively\n", __func__); return err; } @@ -3561,8 +3554,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, { int retval = 0; - SCTP_DEBUG_PRINTK("sctp_setsockopt(sk: %p... optname: %d)\n", - sk, optname); + pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname); /* I can hardly begin to describe how wrong this is. This is * so broken as to be worse than useless. The API draft @@ -3724,8 +3716,8 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr, sctp_lock_sock(sk); - SCTP_DEBUG_PRINTK("%s - sk: %p, sockaddr: %p, addr_len: %d\n", - __func__, sk, addr, addr_len); + pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, + addr, addr_len); /* Validate addr_len before calling common connect/connectx routine. */ af = sctp_get_af_specific(addr->sa_family); @@ -3855,7 +3847,7 @@ static int sctp_init_sock(struct sock *sk) struct net *net = sock_net(sk); struct sctp_sock *sp; - SCTP_DEBUG_PRINTK("sctp_init_sock(sk: %p)\n", sk); + pr_debug("%s: sk:%p\n", __func__, sk); sp = sctp_sk(sk); @@ -3990,7 +3982,7 @@ static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; - SCTP_DEBUG_PRINTK("sctp_destroy_sock(sk: %p)\n", sk); + pr_debug("%s: sk:%p\n", __func__, sk); /* Release our hold on the endpoint. */ sp = sctp_sk(sk); @@ -4123,9 +4115,9 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, goto out; } - SCTP_DEBUG_PRINTK("sctp_getsockopt_sctp_status(%d): %d %d %d\n", - len, status.sstat_state, status.sstat_rwnd, - status.sstat_assoc_id); + pr_debug("%s: len:%d, state:%d, rwnd:%d, assoc_id:%d\n", + __func__, len, status.sstat_state, status.sstat_rwnd, + status.sstat_assoc_id); if (copy_to_user(optval, &status, len)) { retval = -EFAULT; @@ -4333,8 +4325,8 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval return PTR_ERR(newfile); } - SCTP_DEBUG_PRINTK("%s: sk: %p newsk: %p sd: %d\n", - __func__, sk, newsock->sk, retval); + pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk, + retval); /* Return the fd mapped to the new socket. */ if (put_user(len, optlen)) { @@ -4467,7 +4459,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) { - SCTP_DEBUG_PRINTK("Failed no transport\n"); + pr_debug("%s: failed no transport\n", __func__); return -EINVAL; } } @@ -4478,7 +4470,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) { - SCTP_DEBUG_PRINTK("Failed no association\n"); + pr_debug("%s: failed no association\n", __func__); return -EINVAL; } @@ -5698,8 +5690,7 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, if (put_user(len, optlen)) return -EFAULT; - SCTP_DEBUG_PRINTK("sctp_getsockopt_assoc_stat(%d): %d\n", - len, sas.sas_assoc_id); + pr_debug("%s: len:%d, assoc_id:%d\n", __func__, len, sas.sas_assoc_id); if (copy_to_user(optval, &sas, len)) return -EFAULT; @@ -5713,8 +5704,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, int retval = 0; int len; - SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p... optname: %d)\n", - sk, optname); + pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname); /* I can hardly begin to describe how wrong this is. This is * so broken as to be worse than useless. The API draft @@ -5894,7 +5884,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) snum = ntohs(addr->v4.sin_port); - SCTP_DEBUG_PRINTK("sctp_get_port() begins, snum=%d\n", snum); + pr_debug("%s: begins, snum:%d\n", __func__, snum); + sctp_local_bh_disable(); if (snum == 0) { @@ -5960,7 +5951,8 @@ pp_found: int reuse = sk->sk_reuse; struct sock *sk2; - SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n"); + pr_debug("%s: found a possible match\n", __func__); + if (pp->fastreuse && sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING) goto success; @@ -5990,7 +5982,8 @@ pp_found: goto fail_unlock; } } - SCTP_DEBUG_PRINTK("sctp_get_port(): Found a match\n"); + + pr_debug("%s: found a match\n", __func__); } pp_not_found: /* If there was a hash table miss, create a new port. */ @@ -6479,8 +6472,8 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, timeo = sock_rcvtimeo(sk, noblock); - SCTP_DEBUG_PRINTK("Timeout: timeo: %ld, MAX: %ld.\n", - timeo, MAX_SCHEDULE_TIMEOUT); + pr_debug("%s: timeo:%ld, max:%ld\n", __func__, timeo, + MAX_SCHEDULE_TIMEOUT); do { /* Again only user level code calls this function, @@ -6611,8 +6604,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, long current_timeo = *timeo_p; DEFINE_WAIT(wait); - SCTP_DEBUG_PRINTK("wait_for_sndbuf: asoc=%p, timeo=%ld, msg_len=%zu\n", - asoc, (long)(*timeo_p), msg_len); + pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, + *timeo_p, msg_len); /* Increment the association's refcnt. */ sctp_association_hold(asoc); @@ -6718,8 +6711,7 @@ static int sctp_wait_for_connect(struct sctp_association *asoc, long *timeo_p) long current_timeo = *timeo_p; DEFINE_WAIT(wait); - SCTP_DEBUG_PRINTK("%s: asoc=%p, timeo=%ld\n", __func__, asoc, - (long)(*timeo_p)); + pr_debug("%s: asoc:%p, timeo:%ld\n", __func__, asoc, *timeo_p); /* Increment the association's refcnt. */ sctp_association_hold(asoc); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 5d3c71bbd19..bdbbc3fd7c1 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -176,7 +176,10 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head) */ static void sctp_transport_destroy(struct sctp_transport *transport) { - SCTP_ASSERT(transport->dead, "Transport is not dead", return); + if (unlikely(!transport->dead)) { + WARN(1, "Attempt to destroy undead transport %p!\n", transport); + return; + } call_rcu(&transport->rcu, sctp_transport_destroy_rcu); @@ -317,11 +320,9 @@ void sctp_transport_put(struct sctp_transport *transport) /* Update transport's RTO based on the newly calculated RTT. */ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) { - /* Check for valid transport. */ - SCTP_ASSERT(tp, "NULL transport", return); - - /* We should not be doing any RTO updates unless rto_pending is set. */ - SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return); + if (unlikely(!tp->rto_pending)) + /* We should not be doing any RTO updates unless rto_pending is set. */ + pr_debug("%s: rto_pending not set on transport %p!\n", __func__, tp); if (tp->rttvar || tp->srtt) { struct net *net = sock_net(tp->asoc->base.sk); @@ -377,9 +378,8 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) */ tp->rto_pending = 0; - SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d " - "rttvar: %d, rto: %ld\n", __func__, - tp, rtt, tp->srtt, tp->rttvar, tp->rto); + pr_debug("%s: transport:%p, rtt:%d, srtt:%d rttvar:%d, rto:%ld\n", + __func__, tp, rtt, tp->srtt, tp->rttvar, tp->rto); } /* This routine updates the transport's cwnd and partial_bytes_acked @@ -433,12 +433,11 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, cwnd += pmtu; else cwnd += bytes_acked; - SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, " - "bytes_acked: %d, cwnd: %d, ssthresh: %d, " - "flight_size: %d, pba: %d\n", - __func__, - transport, bytes_acked, cwnd, - ssthresh, flight_size, pba); + + pr_debug("%s: slow start: transport:%p, bytes_acked:%d, " + "cwnd:%d, ssthresh:%d, flight_size:%d, pba:%d\n", + __func__, transport, bytes_acked, cwnd, ssthresh, + flight_size, pba); } else { /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh, * upon each SACK arrival that advances the Cumulative TSN Ack @@ -459,12 +458,12 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, cwnd += pmtu; pba = ((cwnd < pba) ? (pba - cwnd) : 0); } - SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: " - "transport: %p, bytes_acked: %d, cwnd: %d, " - "ssthresh: %d, flight_size: %d, pba: %d\n", - __func__, - transport, bytes_acked, cwnd, - ssthresh, flight_size, pba); + + pr_debug("%s: congestion avoidance: transport:%p, " + "bytes_acked:%d, cwnd:%d, ssthresh:%d, " + "flight_size:%d, pba:%d\n", __func__, + transport, bytes_acked, cwnd, ssthresh, + flight_size, pba); } transport->cwnd = cwnd; @@ -558,10 +557,10 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, } transport->partial_bytes_acked = 0; - SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: " - "%d ssthresh: %d\n", __func__, - transport, reason, - transport->cwnd, transport->ssthresh); + + pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d\n", + __func__, transport, reason, transport->cwnd, + transport->ssthresh); } /* Apply Max.Burst limit to the congestion window: -- cgit v1.2.3 From 6c734fb8592f6768170e48e7102cb2f0a1bb9759 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Jun 2013 12:02:59 +0800 Subject: gre: fix a regression in ioctl When testing GRE tunnel, I got: # ip tunnel show get tunnel gre0 failed: Invalid argument get tunnel gre1 failed: Invalid argument This is a regression introduced by commit c54419321455631079c7d ("GRE: Refactor GRE tunneling code.") because previously we only check the parameters for SIOCADDTUNNEL and SIOCCHGTUNNEL, after that commit, the check is moved for all commands. So, just check for SIOCADDTUNNEL and SIOCCHGTUNNEL. After this patch I got: # ip tunnel show gre0: gre/ip remote any local any ttl inherit nopmtudisc gre1: gre/ip remote 192.168.122.101 local 192.168.122.45 ttl inherit Cc: Pravin B Shelar Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c326e869993..1f6eab66f7c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -314,10 +314,11 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) + return -EINVAL; } p.i_flags = gre_flags_to_tnl_flags(p.i_flags); p.o_flags = gre_flags_to_tnl_flags(p.o_flags); -- cgit v1.2.3 From ab6c7a0a43c2eaafa57583822b619b22637b49c7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Jun 2013 13:00:57 +0800 Subject: vti: remove duplicated code to fix a memory leak vti module allocates dev->tstats twice: in vti_fb_tunnel_init() and in vti_tunnel_init(), this lead to a memory leak of dev->tstats. Just remove the duplicated operations in vti_fb_tunnel_init(). (candidate for -stable) Cc: Stephen Hemminger Cc: Saurabh Mohan Cc: "David S. Miller" Signed-off-by: Cong Wang Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_vti.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c118f6b576b..17cc0ffa8c0 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -606,17 +606,10 @@ static int __net_init vti_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id); - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - iph->version = 4; iph->protocol = IPPROTO_IPIP; iph->ihl = 5; - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - dev_hold(dev); rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); return 0; -- cgit v1.2.3 From 8965779d2c0e6ab246c82a405236b1fb2adae6b2 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Sat, 29 Jun 2013 21:30:49 +0800 Subject: ipv6,mcast: always hold idev->lock before mca_lock dingtianhong reported the following deadlock detected by lockdep: ====================================================== [ INFO: possible circular locking dependency detected ] 3.4.24.05-0.1-default #1 Not tainted ------------------------------------------------------- ksoftirqd/0/3 is trying to acquire lock: (&ndev->lock){+.+...}, at: [] ipv6_get_lladdr+0x74/0x120 but task is already holding lock: (&mc->mca_lock){+.+...}, at: [] mld_send_report+0x40/0x150 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mc->mca_lock){+.+...}: [] validate_chain+0x637/0x730 [] __lock_acquire+0x2f7/0x500 [] lock_acquire+0x114/0x150 [] rt_spin_lock+0x4a/0x60 [] igmp6_group_added+0x3b/0x120 [] ipv6_mc_up+0x38/0x60 [] ipv6_find_idev+0x3d/0x80 [] addrconf_notify+0x3d5/0x4b0 [] notifier_call_chain+0x3f/0x80 [] raw_notifier_call_chain+0x11/0x20 [] call_netdevice_notifiers+0x32/0x60 [] __dev_notify_flags+0x34/0x80 [] dev_change_flags+0x40/0x70 [] do_setlink+0x237/0x8a0 [] rtnl_newlink+0x3ec/0x600 [] rtnetlink_rcv_msg+0x160/0x310 [] netlink_rcv_skb+0x89/0xb0 [] rtnetlink_rcv+0x27/0x40 [] netlink_unicast+0x140/0x180 [] netlink_sendmsg+0x33e/0x380 [] sock_sendmsg+0x112/0x130 [] __sys_sendmsg+0x44e/0x460 [] sys_sendmsg+0x44/0x70 [] system_call_fastpath+0x16/0x1b -> #0 (&ndev->lock){+.+...}: [] check_prev_add+0x3de/0x440 [] validate_chain+0x637/0x730 [] __lock_acquire+0x2f7/0x500 [] lock_acquire+0x114/0x150 [] rt_read_lock+0x42/0x60 [] ipv6_get_lladdr+0x74/0x120 [] mld_newpack+0xb6/0x160 [] add_grhead+0xab/0xc0 [] add_grec+0x3ab/0x460 [] mld_send_report+0x5a/0x150 [] igmp6_timer_handler+0x4e/0xb0 [] call_timer_fn+0xca/0x1d0 [] run_timer_softirq+0x1df/0x2e0 [] handle_pending_softirqs+0xf7/0x1f0 [] __do_softirq_common+0x7b/0xf0 [] __thread_do_softirq+0x1af/0x210 [] run_ksoftirqd+0xe1/0x1f0 [] kthread+0xae/0xc0 [] kernel_thread_helper+0x4/0x10 actually we can just hold idev->lock before taking pmc->mca_lock, and avoid taking idev->lock again when iterating idev->addr_list, since the upper callers of mld_newpack() already take read_lock_bh(&idev->lock). Reported-by: dingtianhong Cc: dingtianhong Cc: Hideaki YOSHIFUJI Cc: David S. Miller Cc: Hannes Frederic Sowa Tested-by: Ding Tianhong Tested-by: Chen Weilong Signed-off-by: Cong Wang Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- net/ipv6/mcast.c | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 12dd2fec045..75fd93bdd0d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1444,8 +1444,8 @@ try_nextdev: } EXPORT_SYMBOL(ipv6_dev_get_saddr); -static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, - unsigned char banned_flags) +int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, + unsigned char banned_flags) { struct inet6_ifaddr *ifp; int err = -EADDRNOTAVAIL; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 502c877cbf1..99cd65c715c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1351,8 +1351,9 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, hdr->daddr = *daddr; } -static struct sk_buff *mld_newpack(struct net_device *dev, int size) +static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) { + struct net_device *dev = idev->dev; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.igmp_sk; struct sk_buff *skb; @@ -1377,7 +1378,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) skb_reserve(skb, hlen); - if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { + if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { /* : * use unspecified address as the source address * when a valid link-local address is not available. @@ -1474,7 +1475,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_grec *pgr; if (!skb) - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(pmc->idev, dev->mtu); if (!skb) return NULL; pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec)); @@ -1494,7 +1495,8 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) { - struct net_device *dev = pmc->idev->dev; + struct inet6_dev *idev = pmc->idev; + struct net_device *dev = idev->dev; struct mld2_report *pmr; struct mld2_grec *pgr = NULL; struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; @@ -1523,7 +1525,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); } } first = 1; @@ -1550,7 +1552,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = htons(scount); if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); first = 1; scount = 0; } @@ -1605,8 +1607,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) struct sk_buff *skb = NULL; int type; + read_lock_bh(&idev->lock); if (!pmc) { - read_lock_bh(&idev->lock); for (pmc=idev->mc_list; pmc; pmc=pmc->next) { if (pmc->mca_flags & MAF_NOREPORT) continue; @@ -1618,7 +1620,6 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); } else { spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) @@ -1628,6 +1629,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } + read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } -- cgit v1.2.3 From e02010adeeb21ef56d6b9b68c785ed1ecc832aee Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 1 Jul 2013 11:31:36 +0200 Subject: net: sctp: get rid of SCTP_DBG_TSNS entirely After having reworked the debugging framework, Neil and Vlad agreed to get rid of the leftover SCTP_DBG_TSNS code for a couple of reasons: We can use systemtap scripts to investigate these things, we now have pr_debug() helpers that make life easier, and if we really need anything else besides those tools, we will be forced to come up with something better than we have there. Therefore, get rid of this ifdef debugging code entirely for now. Signed-off-by: Daniel Borkmann CC: Vlad Yasevich CC: Neil Horman Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/Kconfig | 9 ----- net/sctp/outqueue.c | 105 ---------------------------------------------------- 2 files changed, 114 deletions(-) (limited to 'net') diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index d80bf1aebae..71c1a598d9b 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -49,15 +49,6 @@ config NET_SCTPPROBE To compile this code as a module, choose M here: the module will be called sctp_probe. -config SCTP_DBG_TSNS - bool "SCTP: Debug transactions" - help - If you say Y, this will enable transaction debugging, visible - from the kernel's dynamic debugging framework. - - If unsure, say N. However, if you are running into problems, use - this option to gather outqueue trace information. - config SCTP_DBG_OBJCNT bool "SCTP: Debug object counts" depends on PROC_FS diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 511b3b35d60..cb80a8e060b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1335,21 +1335,6 @@ static void sctp_check_transmitted(struct sctp_outq *q, int bytes_acked = 0; int migrate_bytes = 0; - /* These state variables are for coherent debug output. --xguo */ - -#ifdef CONFIG_SCTP_DBG_TSNS - __u32 dbg_ack_tsn = 0; /* An ACKed TSN range starts here... */ - __u32 dbg_last_ack_tsn = 0; /* ...and finishes here. */ - __u32 dbg_kept_tsn = 0; /* An un-ACKed range starts here... */ - __u32 dbg_last_kept_tsn = 0; /* ...and finishes here. */ - - /* 0 : The last TSN was ACKed. - * 1 : The last TSN was NOT ACKed (i.e. KEPT). - * -1: We need to initialize. - */ - int dbg_prt_state = -1; -#endif /* CONFIG_SCTP_DBG_TSNS */ - sack_ctsn = ntohl(sack->cum_tsn_ack); INIT_LIST_HEAD(&tlist); @@ -1471,49 +1456,6 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ list_add_tail(lchunk, &tlist); } - -#ifdef CONFIG_SCTP_DBG_TSNS - switch (dbg_prt_state) { - case 0: /* last TSN was ACKed */ - if (dbg_last_ack_tsn + 1 == tsn) { - /* This TSN belongs to the - * current ACK range. - */ - break; - } - - if (dbg_last_ack_tsn != dbg_ack_tsn) { - /* Display the end of the - * current range. - */ - pr_cont("-%08x", dbg_last_ack_tsn); - } - - /* Start a new range. */ - pr_cont(",%08x", tsn); - dbg_ack_tsn = tsn; - break; - - case 1: /* The last TSN was NOT ACKed. */ - if (dbg_last_kept_tsn != dbg_kept_tsn) { - /* Display the end of current range. */ - pr_cont("-%08x", dbg_last_kept_tsn); - } - - pr_cont("\n"); - /* FALL THROUGH... */ - default: - /* This is the first-ever TSN we examined. */ - /* Start a new range of ACK-ed TSNs. */ - pr_debug("ACKed: %08x", tsn); - - dbg_prt_state = 0; - dbg_ack_tsn = tsn; - } - - dbg_last_ack_tsn = tsn; -#endif /* CONFIG_SCTP_DBG_TSNS */ - } else { if (tchunk->tsn_gap_acked) { pr_debug("%s: receiver reneged on data TSN:0x%x\n", @@ -1537,56 +1479,9 @@ static void sctp_check_transmitted(struct sctp_outq *q, } list_add_tail(lchunk, &tlist); - -#ifdef CONFIG_SCTP_DBG_TSNS - /* See the above comments on ACK-ed TSNs. */ - switch (dbg_prt_state) { - case 1: - if (dbg_last_kept_tsn + 1 == tsn) - break; - - if (dbg_last_kept_tsn != dbg_kept_tsn) - pr_cont("-%08x", dbg_last_kept_tsn); - - pr_cont(",%08x", tsn); - dbg_kept_tsn = tsn; - break; - - case 0: - if (dbg_last_ack_tsn != dbg_ack_tsn) - pr_cont("-%08x", dbg_last_ack_tsn); - - pr_cont("\n"); - /* FALL THROUGH... */ - default: - pr_debug("KEPT: %08x", tsn); - - dbg_prt_state = 1; - dbg_kept_tsn = tsn; - } - - dbg_last_kept_tsn = tsn; -#endif /* CONFIG_SCTP_DBG_TSNS */ } } -#ifdef CONFIG_SCTP_DBG_TSNS - /* Finish off the last range, displaying its ending TSN. */ - switch (dbg_prt_state) { - case 0: - if (dbg_last_ack_tsn != dbg_ack_tsn) - pr_cont("-%08x\n", dbg_last_ack_tsn); - else - pr_cont("\n"); - break; - case 1: - if (dbg_last_kept_tsn != dbg_kept_tsn) - pr_cont("-%08x\n", dbg_last_kept_tsn); - else - pr_cont("\n"); - break; - } -#endif /* CONFIG_SCTP_DBG_TSNS */ if (transport) { if (bytes_acked) { struct sctp_association *asoc = transport->asoc; -- cgit v1.2.3 From c590b5e2f05b5e98e614382582b7ae4cddb37599 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Mon, 1 Jul 2013 17:23:30 +0200 Subject: ethtool: make .get_dump_data() harder to misuse by drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the patch "bnx2x: remove zeroing of dump data buffer" showed, it is too easy implement .get_dump_data incorrectly in a driver. Let's make sure drivers cannot get confused by userspace requesting a too big dump. Also WARN if the driver sets dump->len to something weird and make sure the length reported to userspace is the actual length of data copied to userspace. Signed-off-by: Michal Schmidt Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 9255bbdf81f..ab5fa6336c8 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1320,10 +1320,19 @@ static int ethtool_get_dump_data(struct net_device *dev, if (ret) return ret; - len = (tmp.len > dump.len) ? dump.len : tmp.len; + len = min(tmp.len, dump.len); if (!len) return -EFAULT; + /* Don't ever let the driver think there's more space available + * than it requested with .get_dump_flag(). + */ + dump.len = len; + + /* Always allocate enough space to hold the whole thing so that the + * driver does not need to check the length and bother with partial + * dumping. + */ data = vzalloc(tmp.len); if (!data) return -ENOMEM; @@ -1331,6 +1340,16 @@ static int ethtool_get_dump_data(struct net_device *dev, if (ret) goto out; + /* There are two sane possibilities: + * 1. The driver's .get_dump_data() does not touch dump.len. + * 2. Or it may set dump.len to how much it really writes, which + * should be tmp.len (or len if it can do a partial dump). + * In any case respond to userspace with the actual length of data + * it's receiving. + */ + WARN_ON(dump.len != len && dump.len != tmp.len); + dump.len = len; + if (copy_to_user(useraddr, &dump, sizeof(dump))) { ret = -EFAULT; goto out; -- cgit v1.2.3 From e1558a93b61962710733dc8c11a2bc765607f1cd Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 2 Jul 2013 09:02:07 +0800 Subject: l2tp: add missing .owner to struct pppox_proto Add missing .owner of struct pppox_proto. This prevents the module from being removed from underneath its users. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 8dec6876dc5..5ebee2ded9e 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1793,7 +1793,8 @@ static const struct proto_ops pppol2tp_ops = { static const struct pppox_proto pppol2tp_proto = { .create = pppol2tp_create, - .ioctl = pppol2tp_ioctl + .ioctl = pppol2tp_ioctl, + .owner = THIS_MODULE, }; #ifdef CONFIG_L2TP_V3 -- cgit v1.2.3 From 3b7b514f44bff05d26a6499c4d4fac2a83938e6e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 2 Jul 2013 14:49:34 +0800 Subject: ipip: fix a regression in ioctl This is a regression introduced by commit fd58156e456d9f68fe0448 (IPIP: Use ip-tunneling code.) Similar to GRE tunnel, previously we only check the parameters for SIOCADDTUNNEL and SIOCCHGTUNNEL, after that commit, the check is moved for all commands. So, just check for SIOCADDTUNNEL and SIOCCHGTUNNEL. Also, the check for i_key, o_key etc. is suspicious too, which did not exist before, reset them before passing to ip_tunnel_ioctl(). Cc: Pravin B Shelar Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e6905fbda2a..51fc2a1dcdd 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -244,11 +244,13 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - return -EINVAL; - if (p.i_key || p.o_key || p.i_flags || p.o_flags) - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + return -EINVAL; + } + + p.i_key = p.o_key = p.i_flags = p.o_flags = 0; if (p.iph.ttl) p.iph.frag_off |= htons(IP_DF); -- cgit v1.2.3 From 8822b64a0fa64a5dd1dfcf837c5b0be83f8c05d1 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 1 Jul 2013 20:21:30 +0200 Subject: ipv6: call udp_push_pending_frames when uncorking a socket with AF_INET pending data We accidentally call down to ip6_push_pending_frames when uncorking pending AF_INET data on a ipv6 socket. This results in the following splat (from Dave Jones): skbuff: skb_under_panic: text:ffffffff816765f6 len:48 put:40 head:ffff88013deb6df0 data:ffff88013deb6dec tail:0x2c end:0xc0 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:126! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: dccp_ipv4 dccp 8021q garp bridge stp dlci mpoa snd_seq_dummy sctp fuse hidp tun bnep nfnetlink scsi_transport_iscsi rfcomm can_raw can_bcm af_802154 appletalk caif_socket can caif ipt_ULOG x25 rose af_key pppoe pppox ipx phonet irda llc2 ppp_generic slhc p8023 psnap p8022 llc crc_ccitt atm bluetooth +netrom ax25 nfc rfkill rds af_rxrpc coretemp hwmon kvm_intel kvm crc32c_intel snd_hda_codec_realtek ghash_clmulni_intel microcode pcspkr snd_hda_codec_hdmi snd_hda_intel snd_hda_codec snd_hwdep usb_debug snd_seq snd_seq_device snd_pcm e1000e snd_page_alloc snd_timer ptp snd pps_core soundcore xfs libcrc32c CPU: 2 PID: 8095 Comm: trinity-child2 Not tainted 3.10.0-rc7+ #37 task: ffff8801f52c2520 ti: ffff8801e6430000 task.ti: ffff8801e6430000 RIP: 0010:[] [] skb_panic+0x63/0x65 RSP: 0018:ffff8801e6431de8 EFLAGS: 00010282 RAX: 0000000000000086 RBX: ffff8802353d3cc0 RCX: 0000000000000006 RDX: 0000000000003b90 RSI: ffff8801f52c2ca0 RDI: ffff8801f52c2520 RBP: ffff8801e6431e08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff88022ea0c800 R13: ffff88022ea0cdf8 R14: ffff8802353ecb40 R15: ffffffff81cc7800 FS: 00007f5720a10740(0000) GS:ffff880244c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000005862000 CR3: 000000022843c000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600 Stack: ffff88013deb6dec 000000000000002c 00000000000000c0 ffffffff81a3f6e4 ffff8801e6431e18 ffffffff8159a9aa ffff8801e6431e90 ffffffff816765f6 ffffffff810b756b 0000000700000002 ffff8801e6431e40 0000fea9292aa8c0 Call Trace: [] skb_push+0x3a/0x40 [] ip6_push_pending_frames+0x1f6/0x4d0 [] ? mark_held_locks+0xbb/0x140 [] udp_v6_push_pending_frames+0x2b9/0x3d0 [] ? udplite_getfrag+0x20/0x20 [] udp_lib_setsockopt+0x1aa/0x1f0 [] ? fget_light+0x387/0x4f0 [] udpv6_setsockopt+0x34/0x40 [] sock_common_setsockopt+0x14/0x20 [] SyS_setsockopt+0x71/0xd0 [] tracesys+0xdd/0xe2 Code: 00 00 48 89 44 24 10 8b 87 d8 00 00 00 48 89 44 24 08 48 8b 87 e8 00 00 00 48 c7 c7 c0 04 aa 81 48 89 04 24 31 c0 e8 e1 7e ff ff <0f> 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 RIP [] skb_panic+0x63/0x65 RSP This patch adds a check if the pending data is of address family AF_INET and directly calls udp_push_ending_frames from udp_v6_push_pending_frames if that is the case. This bug was found by Dave Jones with trinity. (Also move the initialization of fl6 below the AF_INET check, even if not strictly necessary.) Cc: Dave Jones Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 959502afd8d..6b270e53c20 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -800,7 +800,7 @@ send: /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk) +int udp_push_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -819,6 +819,7 @@ out: up->pending = 0; return err; } +EXPORT_SYMBOL(udp_push_pending_frames); int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f77e34c5a0e..b6f31437a1f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -959,11 +959,16 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct udphdr *uh; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + struct flowi6 *fl6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + fl6 = &inet->cork.fl.u.ip6; + /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) goto out; -- cgit v1.2.3 From 75a493e60ac4bbe2e977e7129d6d8cbb0dd236be Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 2 Jul 2013 08:04:05 +0200 Subject: ipv6: ip6_append_data_mtu did not care about pmtudisc and frag_size If the socket had an IPV6_MTU value set, ip6_append_data_mtu lost track of this when appending the second frame on a corked socket. This results in the following splat: [37598.993962] ------------[ cut here ]------------ [37598.994008] kernel BUG at net/core/skbuff.c:2064! [37598.994008] invalid opcode: 0000 [#1] SMP [37598.994008] Modules linked in: tcp_lp uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_core videodev media vfat fat usb_storage fuse ebtable_nat xt_CHECKSUM bridge stp llc ipt_MASQUERADE nf_conntrack_netbios_ns nf_conntrack_broadcast ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat +nf_nat_ipv4 nf_nat iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i cxgb3 mdio libcxgbi ib_iser rdma_cm ib_addr iw_cm ib_cm ib_sa ib_mad ib_core iscsi_tcp libiscsi_tcp libiscsi +scsi_transport_iscsi rfcomm bnep iTCO_wdt iTCO_vendor_support snd_hda_codec_conexant arc4 iwldvm mac80211 snd_hda_intel acpi_cpufreq mperf coretemp snd_hda_codec microcode cdc_wdm cdc_acm [37598.994008] snd_hwdep cdc_ether snd_seq snd_seq_device usbnet mii joydev btusb snd_pcm bluetooth i2c_i801 e1000e lpc_ich mfd_core ptp iwlwifi pps_core snd_page_alloc mei cfg80211 snd_timer thinkpad_acpi snd tpm_tis soundcore rfkill tpm tpm_bios vhost_net tun macvtap macvlan kvm_intel kvm uinput binfmt_misc +dm_crypt i915 i2c_algo_bit drm_kms_helper drm i2c_core wmi video [37598.994008] CPU 0 [37598.994008] Pid: 27320, comm: t2 Not tainted 3.9.6-200.fc18.x86_64 #1 LENOVO 27744PG/27744PG [37598.994008] RIP: 0010:[] [] skb_copy_and_csum_bits+0x325/0x330 [37598.994008] RSP: 0018:ffff88003670da18 EFLAGS: 00010202 [37598.994008] RAX: ffff88018105c018 RBX: 0000000000000004 RCX: 00000000000006c0 [37598.994008] RDX: ffff88018105a6c0 RSI: ffff88018105a000 RDI: ffff8801e1b0aa00 [37598.994008] RBP: ffff88003670da78 R08: 0000000000000000 R09: ffff88018105c040 [37598.994008] R10: ffff8801e1b0aa00 R11: 0000000000000000 R12: 000000000000fff8 [37598.994008] R13: 00000000000004fc R14: 00000000ffff0504 R15: 0000000000000000 [37598.994008] FS: 00007f28eea59740(0000) GS:ffff88023bc00000(0000) knlGS:0000000000000000 [37598.994008] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [37598.994008] CR2: 0000003d935789e0 CR3: 00000000365cb000 CR4: 00000000000407f0 [37598.994008] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [37598.994008] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [37598.994008] Process t2 (pid: 27320, threadinfo ffff88003670c000, task ffff88022c162ee0) [37598.994008] Stack: [37598.994008] ffff88022e098a00 ffff88020f973fc0 0000000000000008 00000000000004c8 [37598.994008] ffff88020f973fc0 00000000000004c4 ffff88003670da78 ffff8801e1b0a200 [37598.994008] 0000000000000018 00000000000004c8 ffff88020f973fc0 00000000000004c4 [37598.994008] Call Trace: [37598.994008] [] ip6_append_data+0xccf/0xfe0 [37598.994008] [] ? ip_copy_metadata+0x1a0/0x1a0 [37598.994008] [] ? _raw_spin_lock_bh+0x16/0x40 [37598.994008] [] udpv6_sendmsg+0x1ed/0xc10 [37598.994008] [] ? sock_has_perm+0x75/0x90 [37598.994008] [] inet_sendmsg+0x63/0xb0 [37598.994008] [] ? selinux_socket_sendmsg+0x23/0x30 [37598.994008] [] sock_sendmsg+0xb0/0xe0 [37598.994008] [] ? __switch_to+0x181/0x4a0 [37598.994008] [] sys_sendto+0x12d/0x180 [37598.994008] [] ? __audit_syscall_entry+0x94/0xf0 [37598.994008] [] ? syscall_trace_enter+0x231/0x240 [37598.994008] [] tracesys+0xdd/0xe2 [37598.994008] Code: fe 07 00 00 48 c7 c7 04 28 a6 81 89 45 a0 4c 89 4d b8 44 89 5d a8 e8 1b ac b1 ff 44 8b 5d a8 4c 8b 4d b8 8b 45 a0 e9 cf fe ff ff <0f> 0b 66 0f 1f 84 00 00 00 00 00 66 66 66 66 90 55 48 89 e5 48 [37598.994008] RIP [] skb_copy_and_csum_bits+0x325/0x330 [37598.994008] RSP [37599.007323] ---[ end trace d69f6a17f8ac8eee ]--- While there, also check if path mtu discovery is activated for this socket. The logic was adapted from ip6_append_data when first writing on the corked socket. This bug was introduced with commit 0c1833797a5a6ec23ea9261d979aa18078720b74 ("ipv6: fix incorrect ipsec fragment"). v2: a) Replace IPV6_PMTU_DISC_DO with IPV6_PMTUDISC_PROBE. b) Don't pass ipv6_pinfo to ip6_append_data_mtu (suggestion by Gao feng, thanks!). c) Change mtu to unsigned int, else we get a warning about non-matching types because of the min()-macro type-check. Acked-by: Gao feng Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index dae1949019d..be7589ef5cf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1093,11 +1093,12 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } -static void ip6_append_data_mtu(int *mtu, +static void ip6_append_data_mtu(unsigned int *mtu, int *maxfraglen, unsigned int fragheaderlen, struct sk_buff *skb, - struct rt6_info *rt) + struct rt6_info *rt, + bool pmtuprobe) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { @@ -1109,7 +1110,9 @@ static void ip6_append_data_mtu(int *mtu, * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = dst_mtu(rt->dst.path); + *mtu = min(*mtu, pmtuprobe ? + rt->dst.dev->mtu : + dst_mtu(rt->dst.path)); } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1126,11 +1129,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen; + unsigned int maxfraglen, fragheaderlen, mtu; int exthdrlen; int dst_exthdrlen; int hh_len; - int mtu; int copy; int err; int offset = 0; @@ -1287,7 +1289,9 @@ alloc_new_skb: /* update mtu and maxfraglen if necessary */ if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, - fragheaderlen, skb, rt); + fragheaderlen, skb, rt, + np->pmtudisc == + IPV6_PMTUDISC_PROBE); skb_prev = skb; -- cgit v1.2.3 From 06a23fe31ca3992863721f21bdb0307af93da807 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Tue, 2 Jul 2013 20:30:10 +0900 Subject: core/dev: set pkt_type after eth_type_trans() in dev_forward_skb() The dev_forward_skb() assignment of pkt_type should be done after the call to eth_type_trans(). ip-encapsulated packets can be handled by localhost. But skb->pkt_type can be PACKET_OTHERHOST when packet comes via veth into ip tunnel device. In that case, the packet is dropped by ip_rcv(). Although this example uses gretap. l2tp-eth also has same issue. For l2tp-eth case, add dummy device for ip address and ip l2tp command. netns A | root netns | netns B veth<->veth=bridge=gretap <-loop back-> gretap=bridge=veth<->veth arp packet -> pkt_type BROADCAST------------>ip_rcv()------------------------> <- arp reply pkt_type ip_rcv()<-----------------OTHERHOST drop sample operations ip link add tapa type gretap remote 172.17.107.4 local 172.17.107.3 ip link add tapb type gretap remote 172.17.107.3 local 172.17.107.4 ip link set tapa up ip link set tapb up ip address add 172.17.107.3 dev tapa ip address add 172.17.107.4 dev tapb ip route get 172.17.107.3 > local 172.17.107.3 dev lo src 172.17.107.3 > cache ip route get 172.17.107.4 > local 172.17.107.4 dev lo src 172.17.107.4 > cache ip link add vetha type veth peer name vetha-peer ip link add vethb type veth peer name vethb-peer brctl addbr bra brctl addbr brb brctl addif bra tapa brctl addif bra vetha-peer brctl addif brb tapb brctl addif brb vethb-peer brctl show > bridge name bridge id STP enabled interfaces > bra 8000.6ea21e758ff1 no tapa > vetha-peer > brb 8000.420020eb92d5 no tapb > vethb-peer ip link set vetha-peer up ip link set vethb-peer up ip link set bra up ip link set brb up ip netns add a ip netns add b ip link set vetha netns a ip link set vethb netns b ip netns exec a ip address add 10.0.0.3/24 dev vetha ip netns exec b ip address add 10.0.0.4/24 dev vethb ip netns exec a ip link set vetha up ip netns exec b ip link set vethb up ip netns exec a arping -I vetha 10.0.0.4 ARPING 10.0.0.4 from 10.0.0.3 vetha ^CSent 2 probes (2 broadcast(s)) Received 0 response(s) Cc: Jason Wang Cc: "Michael S. Tsirkin" Cc: Eric Dumazet Cc: Patrick McHardy Cc: Hong Zhiguo Cc: Rami Rosen Cc: Tom Parkin Cc: Cong Wang Cc: Pravin B Shelar Cc: Jesse Gross Cc: dev@openvswitch.org Signed-off-by: Isaku Yamahata Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 370354a9c5f..6a93cd8cd26 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1659,6 +1659,12 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } skb_scrub_packet(skb); skb->protocol = eth_type_trans(skb, dev); + + /* eth_type_trans() can set pkt_type. + * clear pkt_type _after_ calling eth_type_trans() + */ + skb->pkt_type = PACKET_HOST; + return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); -- cgit v1.2.3 From 8a59bd3e9b296b93b905b5509c4ff540ee0e00bf Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Tue, 2 Jul 2013 18:39:36 +0200 Subject: sctp: use get_unused_fd_flags(0) instead of get_unused_fd() Macro get_unused_fd() is used to allocate a file descriptor with default flags. Those default flags (0) can be "unsafe": O_CLOEXEC must be used by default to not leak file descriptor across exec(). Instead of macro get_unused_fd(), functions anon_inode_getfd() or get_unused_fd_flags() should be used with flags given by userspace. If not possible, flags should be set to O_CLOEXEC to provide userspace with a default safe behavor. In a further patch, get_unused_fd() will be removed so that new code start using anon_inode_getfd() or get_unused_fd_flags() with correct flags. This patch replaces calls to get_unused_fd() with equivalent call to get_unused_fd_flags(0) to preserve current behavor for existing code. The hard coded flag value (0) should be reviewed on a per-subsystem basis, and, if possible, set to O_CLOEXEC. Signed-off-by: Yann Droneaud Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d5c6a287047..c6670d2e3f8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4312,7 +4312,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval goto out; /* Map the socket to an unused fd that can be returned to the user. */ - retval = get_unused_fd(); + retval = get_unused_fd_flags(0); if (retval < 0) { sock_release(newsock); goto out; -- cgit v1.2.3 From b6dc01a43aaca24e6e6928e24d9b37ba599f1e3c Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 2 Jul 2013 20:28:58 +0100 Subject: l2tp: do data sequence number handling in a separate func This change moves some code handling data sequence numbers into a separate function to avoid too much indentation. This is to prepare for some changes to data sequence number handling in subsequent patches. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 54 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6984c3a353c..5ca29659171 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -542,6 +542,38 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return __skb_checksum_complete(skb); } +/* If packet has sequence numbers, queue it if acceptable. Returns 0 if + * acceptable, else non-zero. + */ +static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) +{ + if (session->reorder_timeout != 0) { + /* Packet reordering enabled. Add skb to session's + * reorder queue, in order of ns. + */ + l2tp_recv_queue_skb(session, skb); + } else { + /* Packet reordering disabled. Discard out-of-sequence + * packets + */ + if (L2TP_SKB_CB(skb)->ns != session->nr) { + atomic_long_inc(&session->stats.rx_seq_discards); + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", + session->name, L2TP_SKB_CB(skb)->ns, + L2TP_SKB_CB(skb)->length, session->nr, + skb_queue_len(&session->reorder_q)); + goto discard; + } + skb_queue_tail(&session->reorder_q, skb); + } + + return 0; + +discard: + return 1; +} + /* Do receive processing of L2TP data frames. We handle both L2TPv2 * and L2TPv3 data frames here. * @@ -757,26 +789,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * enabled. Saved L2TP protocol info is stored in skb->sb[]. */ if (L2TP_SKB_CB(skb)->has_seq) { - if (session->reorder_timeout != 0) { - /* Packet reordering enabled. Add skb to session's - * reorder queue, in order of ns. - */ - l2tp_recv_queue_skb(session, skb); - } else { - /* Packet reordering disabled. Discard out-of-sequence - * packets - */ - if (L2TP_SKB_CB(skb)->ns != session->nr) { - atomic_long_inc(&session->stats.rx_seq_discards); - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", - session->name, L2TP_SKB_CB(skb)->ns, - L2TP_SKB_CB(skb)->length, session->nr, - skb_queue_len(&session->reorder_q)); - goto discard; - } - skb_queue_tail(&session->reorder_q, skb); - } + if (l2tp_recv_data_seq(session, skb)) + goto discard; } else { /* No sequence numbers. Add the skb to the tail of the * reorder queue. This ensures that it will be -- cgit v1.2.3 From 8a1631d588a39e826f4248e60310498d5266c6fa Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 2 Jul 2013 20:28:59 +0100 Subject: l2tp: make datapath sequence number support RFC-compliant The L2TP datapath is not currently RFC-compliant when sequence numbers are used in L2TP data packets. According to the L2TP RFC, any received sequence number NR greater than or equal to the next expected NR is acceptable, where the "greater than or equal to" test is determined by the NR wrap point. This differs for L2TPv2 and L2TPv3, so add state in the session context to hold the max NR value and the NR window size in order to do the acceptable sequence number value check. These might be configurable later, but for now we derive it from the tunnel L2TP version, which determines the sequence number field size. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 36 +++++++++++++++++++++++++++++++----- net/l2tp/l2tp_core.h | 2 ++ 2 files changed, 33 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 5ca29659171..735cc06971e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -414,10 +414,7 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * if (L2TP_SKB_CB(skb)->has_seq) { /* Bump our Nr */ session->nr++; - if (tunnel->version == L2TP_HDR_VER_2) - session->nr &= 0xffff; - else - session->nr &= 0xffffff; + session->nr &= session->nr_max; l2tp_dbg(session, L2TP_MSG_SEQ, "%s: updated nr to %hu\n", session->name, session->nr); @@ -542,11 +539,34 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return __skb_checksum_complete(skb); } +static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr) +{ + u32 nws; + + if (nr >= session->nr) + nws = nr - session->nr; + else + nws = (session->nr_max + 1) - (session->nr - nr); + + return nws < session->nr_window_size; +} + /* If packet has sequence numbers, queue it if acceptable. Returns 0 if * acceptable, else non-zero. */ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) { + if (!l2tp_seq_check_rx_window(session, L2TP_SKB_CB(skb)->ns)) { + /* Packet sequence number is outside allowed window. + * Discard it. + */ + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: pkt %u len %d discarded, outside window, nr=%u\n", + session->name, L2TP_SKB_CB(skb)->ns, + L2TP_SKB_CB(skb)->length, session->nr); + goto discard; + } + if (session->reorder_timeout != 0) { /* Packet reordering enabled. Add skb to session's * reorder queue, in order of ns. @@ -556,7 +576,8 @@ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) /* Packet reordering disabled. Discard out-of-sequence * packets */ - if (L2TP_SKB_CB(skb)->ns != session->nr) { + if ((L2TP_SKB_CB(skb)->ns != session->nr) && + (!session->reorder_skip)) { atomic_long_inc(&session->stats.rx_seq_discards); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", @@ -1826,6 +1847,11 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn session->session_id = session_id; session->peer_session_id = peer_session_id; session->nr = 0; + if (tunnel->version == L2TP_HDR_VER_2) + session->nr_max = 0xffff; + else + session->nr_max = 0xffffff; + session->nr_window_size = session->nr_max / 2; sprintf(&session->name[0], "sess %u/%u", tunnel->tunnel_id, session->session_id); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 485a490fd99..4b9a3b72442 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -102,6 +102,8 @@ struct l2tp_session { u32 nr; /* session NR state (receive) */ u32 ns; /* session NR state (send) */ struct sk_buff_head reorder_q; /* receive reorder queue */ + u32 nr_max; /* max NR. Depends on tunnel */ + u32 nr_window_size; /* NR window size */ struct hlist_node hlist; /* Hash list node */ atomic_t ref_count; -- cgit v1.2.3 From a0dbd822273ce7660bf35525d61d7a8ac5e679a3 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Tue, 2 Jul 2013 20:29:00 +0100 Subject: l2tp: make datapath resilient to packet loss when sequence numbers enabled If L2TP data sequence numbers are enabled and reordering is not enabled, data reception stops if a packet is lost since the kernel waits for a sequence number that is never resent. (When reordering is enabled, data reception restarts when the reorder timeout expires.) If no reorder timeout is set, we should count the number of in-sequence packets after the out-of-sequence (OOS) condition is detected, and reset sequence number state after a number of such packets are received. For now, the number of in-sequence packets while in OOS state which cause the sequence number state to be reset is hard-coded to 5. This could be configurable later. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 36 +++++++++++++++++++++++++++++++----- net/l2tp/l2tp_core.h | 3 +++ 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 735cc06971e..feae495a0a3 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -572,12 +572,33 @@ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) * reorder queue, in order of ns. */ l2tp_recv_queue_skb(session, skb); + goto out; + } + + /* Packet reordering disabled. Discard out-of-sequence packets, while + * tracking the number if in-sequence packets after the first OOS packet + * is seen. After nr_oos_count_max in-sequence packets, reset the + * sequence number to re-enable packet reception. + */ + if (L2TP_SKB_CB(skb)->ns == session->nr) { + skb_queue_tail(&session->reorder_q, skb); } else { - /* Packet reordering disabled. Discard out-of-sequence - * packets - */ - if ((L2TP_SKB_CB(skb)->ns != session->nr) && - (!session->reorder_skip)) { + u32 nr_oos = L2TP_SKB_CB(skb)->ns; + u32 nr_next = (session->nr_oos + 1) & session->nr_max; + + if (nr_oos == nr_next) + session->nr_oos_count++; + else + session->nr_oos_count = 0; + + session->nr_oos = nr_oos; + if (session->nr_oos_count > session->nr_oos_count_max) { + session->reorder_skip = 1; + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: %d oos packets received. Resetting sequence numbers\n", + session->name, session->nr_oos_count); + } + if (!session->reorder_skip) { atomic_long_inc(&session->stats.rx_seq_discards); l2tp_dbg(session, L2TP_MSG_SEQ, "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", @@ -589,6 +610,7 @@ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) skb_queue_tail(&session->reorder_q, skb); } +out: return 0; discard: @@ -1852,6 +1874,10 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn else session->nr_max = 0xffffff; session->nr_window_size = session->nr_max / 2; + session->nr_oos_count_max = 4; + + /* Use NR of first received packet */ + session->reorder_skip = 1; sprintf(&session->name[0], "sess %u/%u", tunnel->tunnel_id, session->session_id); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 4b9a3b72442..66a559b104b 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -104,6 +104,9 @@ struct l2tp_session { struct sk_buff_head reorder_q; /* receive reorder queue */ u32 nr_max; /* max NR. Depends on tunnel */ u32 nr_window_size; /* NR window size */ + u32 nr_oos; /* NR of last OOS packet */ + int nr_oos_count; /* For OOS recovery */ + int nr_oos_count_max; struct hlist_node hlist; /* Hash list node */ atomic_t ref_count; -- cgit v1.2.3 From 23a3647bc4f93bac3776c66dc2c7f7f68b3cd662 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 2 Jul 2013 10:57:33 -0700 Subject: ip_tunnels: Use skb-len to PMTU check. In path mtu check, ip header total length works for gre device but not for gre-tap device. Use skb len which is consistent for all tunneling types. This is old bug in gre. This also fixes mtu calculation bug introduced by commit c54419321455631079c7d (GRE: Refactor GRE tunneling code). Reported-by: Timo Teras Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 99 +++++++++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 394cebc96d2..945734b2f20 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -472,6 +472,54 @@ drop: } EXPORT_SYMBOL_GPL(ip_tunnel_rcv); +static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, + struct rtable *rt, __be16 df) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int pkt_size = skb->len - tunnel->hlen; + int mtu; + + if (df) + mtu = dst_mtu(&rt->dst) - dev->hard_header_len + - sizeof(struct iphdr) - tunnel->hlen; + else + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IP)) { + if (!skb_is_gso(skb) && + (df & htons(IP_DF)) && mtu < pkt_size) { + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + return -E2BIG; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && + mtu >= IPV6_MIN_MTU) { + if ((tunnel->parms.iph.daddr && + !ipv4_is_multicast(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); + } + } + + if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && + mtu < pkt_size) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -E2BIG; + } + } +#endif + return 0; +} + void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol) { @@ -483,7 +531,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct rtable *rt; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; - int mtu; int err; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -560,51 +607,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dev->stats.collisions++; goto tx_error; } - df = tnl_params->frag_off; - if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - - sizeof(struct iphdr); - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if (skb->protocol == htons(ETH_P_IP)) { - df |= (inner_iph->frag_off&htons(IP_DF)); - - if (!skb_is_gso(skb) && - (inner_iph->frag_off&htons(IP_DF)) && - mtu < ntohs(inner_iph->tot_len)) { - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } - } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - - if (rt6 && mtu < dst_mtu(skb_dst(skb)) && - mtu >= IPV6_MIN_MTU) { - if ((tunnel->parms.iph.daddr && - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || - rt6->rt6i_dst.plen == 128) { - rt6->rt6i_flags |= RTF_MODIFIED; - dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); - } - } - - if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && - mtu < skb->len) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ip_rt_put(rt); - goto tx_error; - } + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + ip_rt_put(rt); + goto tx_error; } -#endif if (tunnel->net != dev_net(dev)) skb_scrub_packet(skb); @@ -631,6 +638,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } + df = tnl_params->frag_off; + if (skb->protocol == htons(ETH_P_IP)) + df |= (inner_iph->frag_off&htons(IP_DF)); + max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len; if (max_headroom > dev->needed_headroom) { -- cgit v1.2.3 From c50cd357887acf9fd7af3a5d492911bd825555a2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 1 Jul 2013 19:24:00 +0200 Subject: net: gre: move GSO functions to gre_offload Similarly to TCP/UDP offloading, move all related GRE functions to gre_offload.c to make things more explicit and similar to the rest of the code. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv4/Makefile | 1 + net/ipv4/gre.c | 514 ------------------------------------------------- net/ipv4/gre_demux.c | 414 +++++++++++++++++++++++++++++++++++++++ net/ipv4/gre_offload.c | 127 ++++++++++++ 4 files changed, 542 insertions(+), 514 deletions(-) delete mode 100644 net/ipv4/gre.c create mode 100644 net/ipv4/gre_demux.c create mode 100644 net/ipv4/gre_offload.c (limited to 'net') diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 86ded0bac9c..4b81e91c80f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_NET_IPIP) += ipip.o +gre-y := gre_demux.o gre_offload.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_NET_IPVTI) += ip_vti.o diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c deleted file mode 100644 index ba4803e609b..00000000000 --- a/net/ipv4/gre.c +++ /dev/null @@ -1,514 +0,0 @@ -/* - * GRE over IPv4 demultiplexer driver - * - * Authors: Dmitry Kozlov (xeb@mail.ru) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; -static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX]; - -int gre_add_protocol(const struct gre_protocol *proto, u8 version) -{ - if (version >= GREPROTO_MAX) - return -EINVAL; - - return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ? - 0 : -EBUSY; -} -EXPORT_SYMBOL_GPL(gre_add_protocol); - -int gre_del_protocol(const struct gre_protocol *proto, u8 version) -{ - int ret; - - if (version >= GREPROTO_MAX) - return -EINVAL; - - ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ? - 0 : -EBUSY; - - if (ret) - return ret; - - synchronize_rcu(); - return 0; -} -EXPORT_SYMBOL_GPL(gre_del_protocol); - -void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, - int hdr_len) -{ - struct gre_base_hdr *greh; - - skb_push(skb, hdr_len); - - greh = (struct gre_base_hdr *)skb->data; - greh->flags = tnl_flags_to_gre_flags(tpi->flags); - greh->protocol = tpi->proto; - - if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - if (tpi->flags&TUNNEL_SEQ) { - *ptr = tpi->seq; - ptr--; - } - if (tpi->flags&TUNNEL_KEY) { - *ptr = tpi->key; - ptr--; - } - if (tpi->flags&TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { - *ptr = 0; - *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, - skb->len, 0)); - } - } -} -EXPORT_SYMBOL_GPL(gre_build_header); - -struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) -{ - int err; - - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - - if (skb_is_gso(skb)) { - err = skb_unclone(skb, GFP_ATOMIC); - if (unlikely(err)) - goto error; - skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; - return skb; - } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { - err = skb_checksum_help(skb); - if (unlikely(err)) - goto error; - } else if (skb->ip_summed != CHECKSUM_PARTIAL) - skb->ip_summed = CHECKSUM_NONE; - - return skb; -error: - kfree_skb(skb); - return ERR_PTR(err); -} -EXPORT_SYMBOL_GPL(gre_handle_offloads); - -static __sum16 check_checksum(struct sk_buff *skb) -{ - __sum16 csum = 0; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - - if (!csum) - break; - /* Fall through. */ - - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - break; - } - - return csum; -} - -static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err) -{ - unsigned int ip_hlen = ip_hdrlen(skb); - const struct gre_base_hdr *greh; - __be32 *options; - int hdr_len; - - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) - return -EINVAL; - - greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); - if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) - return -EINVAL; - - tpi->flags = gre_flags_to_tnl_flags(greh->flags); - hdr_len = ip_gre_calc_hlen(tpi->flags); - - if (!pskb_may_pull(skb, hdr_len)) - return -EINVAL; - - greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); - tpi->proto = greh->protocol; - - options = (__be32 *)(greh + 1); - if (greh->flags & GRE_CSUM) { - if (check_checksum(skb)) { - *csum_err = true; - return -EINVAL; - } - options++; - } - - if (greh->flags & GRE_KEY) { - tpi->key = *options; - options++; - } else - tpi->key = 0; - - if (unlikely(greh->flags & GRE_SEQ)) { - tpi->seq = *options; - options++; - } else - tpi->seq = 0; - - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { - tpi->proto = htons(ETH_P_IP); - if ((*(u8 *)options & 0xF0) != 0x40) { - hdr_len += 4; - if (!pskb_may_pull(skb, hdr_len)) - return -EINVAL; - } - } - - return iptunnel_pull_header(skb, hdr_len, tpi->proto); -} - -static int gre_cisco_rcv(struct sk_buff *skb) -{ - struct tnl_ptk_info tpi; - int i; - bool csum_err = false; - - if (parse_gre_header(skb, &tpi, &csum_err) < 0) - goto drop; - - rcu_read_lock(); - for (i = 0; i < GRE_IP_PROTO_MAX; i++) { - struct gre_cisco_protocol *proto; - int ret; - - proto = rcu_dereference(gre_cisco_proto_list[i]); - if (!proto) - continue; - ret = proto->handler(skb, &tpi); - if (ret == PACKET_RCVD) { - rcu_read_unlock(); - return 0; - } - } - rcu_read_unlock(); - - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); -drop: - kfree_skb(skb); - return 0; -} - -static void gre_cisco_err(struct sk_buff *skb, u32 info) -{ - /* All the routers (except for Linux) return only - * 8 bytes of packet payload. It means, that precise relaying of - * ICMP in the real Internet is absolutely infeasible. - * - * Moreover, Cisco "wise men" put GRE key to the third word - * in GRE header. It makes impossible maintaining even soft - * state for keyed - * GRE tunnels with enabled checksum. Tell them "thank you". - * - * Well, I wonder, rfc1812 was written by Cisco employee, - * what the hell these idiots break standards established - * by themselves??? - */ - - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - struct tnl_ptk_info tpi; - bool csum_err = false; - int i; - - if (parse_gre_header(skb, &tpi, &csum_err)) { - if (!csum_err) /* ignore csum errors. */ - return; - } - - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - skb->dev->ifindex, 0, IPPROTO_GRE, 0); - return; - } - if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, - IPPROTO_GRE, 0); - return; - } - - rcu_read_lock(); - for (i = 0; i < GRE_IP_PROTO_MAX; i++) { - struct gre_cisco_protocol *proto; - - proto = rcu_dereference(gre_cisco_proto_list[i]); - if (!proto) - continue; - - if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD) - goto out; - - } -out: - rcu_read_unlock(); -} - -static int gre_rcv(struct sk_buff *skb) -{ - const struct gre_protocol *proto; - u8 ver; - int ret; - - if (!pskb_may_pull(skb, 12)) - goto drop; - - ver = skb->data[1]&0x7f; - if (ver >= GREPROTO_MAX) - goto drop; - - rcu_read_lock(); - proto = rcu_dereference(gre_proto[ver]); - if (!proto || !proto->handler) - goto drop_unlock; - ret = proto->handler(skb); - rcu_read_unlock(); - return ret; - -drop_unlock: - rcu_read_unlock(); -drop: - kfree_skb(skb); - return NET_RX_DROP; -} - -static void gre_err(struct sk_buff *skb, u32 info) -{ - const struct gre_protocol *proto; - const struct iphdr *iph = (const struct iphdr *)skb->data; - u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f; - - if (ver >= GREPROTO_MAX) - return; - - rcu_read_lock(); - proto = rcu_dereference(gre_proto[ver]); - if (proto && proto->err_handler) - proto->err_handler(skb, info); - rcu_read_unlock(); -} - -static struct sk_buff *gre_gso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - netdev_features_t enc_features; - int ghl = GRE_HEADER_SECTION; - struct gre_base_hdr *greh; - int mac_len = skb->mac_len; - __be16 protocol = skb->protocol; - int tnl_hlen; - bool csum; - - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_TCPV6 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_GRE))) - goto out; - - if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) - goto out; - - greh = (struct gre_base_hdr *)skb_transport_header(skb); - - if (greh->flags & GRE_KEY) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_SEQ) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_CSUM) { - ghl += GRE_HEADER_SECTION; - csum = true; - } else - csum = false; - - /* setup inner skb. */ - skb->protocol = greh->protocol; - skb->encapsulation = 0; - - if (unlikely(!pskb_may_pull(skb, ghl))) - goto out; - __skb_pull(skb, ghl); - skb_reset_mac_header(skb); - skb_set_network_header(skb, skb_inner_network_offset(skb)); - skb->mac_len = skb_inner_network_offset(skb); - - /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); - segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) - goto out; - - skb = segs; - tnl_hlen = skb_tnl_header_len(skb); - do { - __skb_push(skb, ghl); - if (csum) { - __be32 *pcsum; - - if (skb_has_shared_frag(skb)) { - int err; - - err = __skb_linearize(skb); - if (err) { - kfree_skb(segs); - segs = ERR_PTR(err); - goto out; - } - } - - greh = (struct gre_base_hdr *)(skb->data); - pcsum = (__be32 *)(greh + 1); - *pcsum = 0; - *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); - } - __skb_push(skb, tnl_hlen - ghl); - - skb_reset_mac_header(skb); - skb_set_network_header(skb, mac_len); - skb->mac_len = mac_len; - skb->protocol = protocol; - } while ((skb = skb->next)); -out: - return segs; -} - -static int gre_gso_send_check(struct sk_buff *skb) -{ - if (!skb->encapsulation) - return -EINVAL; - return 0; -} - -static const struct net_protocol net_gre_protocol = { - .handler = gre_rcv, - .err_handler = gre_err, - .netns_ok = 1, -}; - -static const struct net_offload gre_offload = { - .callbacks = { - .gso_send_check = gre_gso_send_check, - .gso_segment = gre_gso_segment, - }, -}; - -static const struct gre_protocol ipgre_protocol = { - .handler = gre_cisco_rcv, - .err_handler = gre_cisco_err, -}; - -int gre_cisco_register(struct gre_cisco_protocol *newp) -{ - struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) - &gre_cisco_proto_list[newp->priority]; - - return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY; -} -EXPORT_SYMBOL_GPL(gre_cisco_register); - -int gre_cisco_unregister(struct gre_cisco_protocol *del_proto) -{ - struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) - &gre_cisco_proto_list[del_proto->priority]; - int ret; - - ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL; - - if (ret) - return ret; - - synchronize_net(); - return 0; -} -EXPORT_SYMBOL_GPL(gre_cisco_unregister); - -static int __init gre_init(void) -{ - pr_info("GRE over IPv4 demultiplexor driver\n"); - - if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { - pr_err("can't add protocol\n"); - goto err; - } - - if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) { - pr_info("%s: can't add ipgre handler\n", __func__); - goto err_gre; - } - - if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { - pr_err("can't add protocol offload\n"); - goto err_gso; - } - - return 0; -err_gso: - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); -err_gre: - inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); -err: - return -EAGAIN; -} - -static void __exit gre_exit(void) -{ - inet_del_offload(&gre_offload, IPPROTO_GRE); - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); - inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); -} - -module_init(gre_init); -module_exit(gre_exit); - -MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); -MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c new file mode 100644 index 00000000000..736c9fc3ef9 --- /dev/null +++ b/net/ipv4/gre_demux.c @@ -0,0 +1,414 @@ +/* + * GRE over IPv4 demultiplexer driver + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; +static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX]; + +int gre_add_protocol(const struct gre_protocol *proto, u8 version) +{ + if (version >= GREPROTO_MAX) + return -EINVAL; + + return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ? + 0 : -EBUSY; +} +EXPORT_SYMBOL_GPL(gre_add_protocol); + +int gre_del_protocol(const struct gre_protocol *proto, u8 version) +{ + int ret; + + if (version >= GREPROTO_MAX) + return -EINVAL; + + ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ? + 0 : -EBUSY; + + if (ret) + return ret; + + synchronize_rcu(); + return 0; +} +EXPORT_SYMBOL_GPL(gre_del_protocol); + +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, + int hdr_len) +{ + struct gre_base_hdr *greh; + + skb_push(skb, hdr_len); + + greh = (struct gre_base_hdr *)skb->data; + greh->flags = tnl_flags_to_gre_flags(tpi->flags); + greh->protocol = tpi->proto; + + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + if (tpi->flags&TUNNEL_SEQ) { + *ptr = tpi->seq; + ptr--; + } + if (tpi->flags&TUNNEL_KEY) { + *ptr = tpi->key; + ptr--; + } + if (tpi->flags&TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + *ptr = 0; + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, + skb->len, 0)); + } + } +} +EXPORT_SYMBOL_GPL(gre_build_header); + +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) +{ + int err; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + return skb; +error: + kfree_skb(skb); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(gre_handle_offloads); + +static __sum16 check_checksum(struct sk_buff *skb) +{ + __sum16 csum = 0; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); + + if (!csum) + break; + /* Fall through. */ + + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; + } + + return csum; +} + +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err) +{ + unsigned int ip_hlen = ip_hdrlen(skb); + const struct gre_base_hdr *greh; + __be32 *options; + int hdr_len; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; + + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + hdr_len = ip_gre_calc_hlen(tpi->flags); + + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + tpi->proto = greh->protocol; + + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (check_checksum(skb)) { + *csum_err = true; + return -EINVAL; + } + options++; + } + + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else + tpi->key = 0; + + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else + tpi->seq = 0; + + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + hdr_len += 4; + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + } + } + + return iptunnel_pull_header(skb, hdr_len, tpi->proto); +} + +static int gre_cisco_rcv(struct sk_buff *skb) +{ + struct tnl_ptk_info tpi; + int i; + bool csum_err = false; + + if (parse_gre_header(skb, &tpi, &csum_err) < 0) + goto drop; + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + int ret; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + ret = proto->handler(skb, &tpi); + if (ret == PACKET_RCVD) { + rcu_read_unlock(); + return 0; + } + } + rcu_read_unlock(); + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); +drop: + kfree_skb(skb); + return 0; +} + +static void gre_cisco_err(struct sk_buff *skb, u32 info) +{ + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + * + * Moreover, Cisco "wise men" put GRE key to the third word + * in GRE header. It makes impossible maintaining even soft + * state for keyed + * GRE tunnels with enabled checksum. Tell them "thank you". + * + * Well, I wonder, rfc1812 was written by Cisco employee, + * what the hell these idiots break standards established + * by themselves??? + */ + + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; + struct tnl_ptk_info tpi; + bool csum_err = false; + int i; + + if (parse_gre_header(skb, &tpi, &csum_err)) { + if (!csum_err) /* ignore csum errors. */ + return; + } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + skb->dev->ifindex, 0, IPPROTO_GRE, 0); + return; + } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, + IPPROTO_GRE, 0); + return; + } + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + + if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD) + goto out; + + } +out: + rcu_read_unlock(); +} + +static int gre_rcv(struct sk_buff *skb) +{ + const struct gre_protocol *proto; + u8 ver; + int ret; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + ver = skb->data[1]&0x7f; + if (ver >= GREPROTO_MAX) + goto drop; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (!proto || !proto->handler) + goto drop_unlock; + ret = proto->handler(skb); + rcu_read_unlock(); + return ret; + +drop_unlock: + rcu_read_unlock(); +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static void gre_err(struct sk_buff *skb, u32 info) +{ + const struct gre_protocol *proto; + const struct iphdr *iph = (const struct iphdr *)skb->data; + u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f; + + if (ver >= GREPROTO_MAX) + return; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (proto && proto->err_handler) + proto->err_handler(skb, info); + rcu_read_unlock(); +} + +static const struct net_protocol net_gre_protocol = { + .handler = gre_rcv, + .err_handler = gre_err, + .netns_ok = 1, +}; + +static const struct gre_protocol ipgre_protocol = { + .handler = gre_cisco_rcv, + .err_handler = gre_cisco_err, +}; + +int gre_cisco_register(struct gre_cisco_protocol *newp) +{ + struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) + &gre_cisco_proto_list[newp->priority]; + + return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY; +} +EXPORT_SYMBOL_GPL(gre_cisco_register); + +int gre_cisco_unregister(struct gre_cisco_protocol *del_proto) +{ + struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) + &gre_cisco_proto_list[del_proto->priority]; + int ret; + + ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL; + + if (ret) + return ret; + + synchronize_net(); + return 0; +} +EXPORT_SYMBOL_GPL(gre_cisco_unregister); + +static int __init gre_init(void) +{ + pr_info("GRE over IPv4 demultiplexor driver\n"); + + if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { + pr_err("can't add protocol\n"); + goto err; + } + + if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) { + pr_info("%s: can't add ipgre handler\n", __func__); + goto err_gre; + } + + if (gre_offload_init()) { + pr_err("can't add protocol offload\n"); + goto err_gso; + } + + return 0; +err_gso: + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); +err_gre: + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); +err: + return -EAGAIN; +} + +static void __exit gre_exit(void) +{ + gre_offload_exit(); + + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); +} + +module_init(gre_init); +module_exit(gre_exit); + +MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c new file mode 100644 index 00000000000..a9d8cd2bff4 --- /dev/null +++ b/net/ipv4/gre_offload.c @@ -0,0 +1,127 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * GRE GSO support + */ + +#include +#include +#include + +static int gre_gso_send_check(struct sk_buff *skb) +{ + if (!skb->encapsulation) + return -EINVAL; + return 0; +} + +static struct sk_buff *gre_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t enc_features; + int ghl = GRE_HEADER_SECTION; + struct gre_base_hdr *greh; + int mac_len = skb->mac_len; + __be16 protocol = skb->protocol; + int tnl_hlen; + bool csum; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) + goto out; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + + if (greh->flags & GRE_KEY) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_SEQ) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_CSUM) { + ghl += GRE_HEADER_SECTION; + csum = true; + } else + csum = false; + + /* setup inner skb. */ + skb->protocol = greh->protocol; + skb->encapsulation = 0; + + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + + __skb_pull(skb, ghl); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + skb = segs; + tnl_hlen = skb_tnl_header_len(skb); + do { + __skb_push(skb, ghl); + if (csum) { + __be32 *pcsum; + + if (skb_has_shared_frag(skb)) { + int err; + + err = __skb_linearize(skb); + if (err) { + kfree_skb(segs); + segs = ERR_PTR(err); + goto out; + } + } + + greh = (struct gre_base_hdr *)(skb->data); + pcsum = (__be32 *)(greh + 1); + *pcsum = 0; + *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + } + __skb_push(skb, tnl_hlen - ghl); + + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb->mac_len = mac_len; + skb->protocol = protocol; + } while ((skb = skb->next)); +out: + return segs; +} + +static const struct net_offload gre_offload = { + .callbacks = { + .gso_send_check = gre_gso_send_check, + .gso_segment = gre_gso_segment, + }, +}; + +int __init gre_offload_init(void) +{ + return inet_add_offload(&gre_offload, IPPROTO_GRE); +} + +void __exit gre_offload_exit(void) +{ + inet_del_offload(&gre_offload, IPPROTO_GRE); +} -- cgit v1.2.3 From 96e4dac66f69d28af2b736e723364efbbdf9fdee Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 22 May 2013 20:54:25 -0500 Subject: libceph: add lingering request reference when registered When an osd request is set to linger, the osd client holds onto the request so it can be re-submitted following certain osd map changes. The osd client holds a reference to the request until it is unregistered. This is used by rbd for watch requests. Currently, the reference is taken when the request is marked with the linger flag. This means that if an error occurs after that time but before the the request completes successfully, that reference is leaked. There's really no reason to take the reference until the request is registered in the the osd client's list of lingering requests, and that only happens when the lingering (watch) request completes successfully. So take that reference only when it gets registered following succesful completion, and drop it (as before) when the request gets unregistered. This avoids the reference problem on error in rbd. Rearrange ceph_osdc_unregister_linger_request() to avoid using the request pointer after it may have been freed. And hold an extra reference in kick_requests() while handling a linger request that has not yet been registered, to ensure it doesn't go away. This resolves: http://tracker.ceph.com/issues/3859 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- net/ceph/osd_client.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3a246a6cab4..e0abb83b520 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1174,6 +1174,7 @@ static void __register_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { dout("__register_linger_request %p\n", req); + ceph_osdc_get_request(req); list_add_tail(&req->r_linger_item, &osdc->req_linger); if (req->r_osd) list_add_tail(&req->r_linger_osd, @@ -1196,6 +1197,7 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, if (list_empty(&req->r_osd_item)) req->r_osd = NULL; } + ceph_osdc_put_request(req); } void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, @@ -1203,9 +1205,8 @@ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, { mutex_lock(&osdc->request_mutex); if (req->r_linger) { - __unregister_linger_request(osdc, req); req->r_linger = 0; - ceph_osdc_put_request(req); + __unregister_linger_request(osdc, req); } mutex_unlock(&osdc->request_mutex); } @@ -1217,11 +1218,6 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, if (!req->r_linger) { dout("set_request_linger %p\n", req); req->r_linger = 1; - /* - * caller is now responsible for calling - * unregister_linger_request - */ - ceph_osdc_get_request(req); } } EXPORT_SYMBOL(ceph_osdc_set_request_linger); @@ -1633,8 +1629,10 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("%p tid %llu restart on osd%d\n", req, req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); + ceph_osdc_get_request(req); __unregister_request(osdc, req); __register_linger_request(osdc, req); + ceph_osdc_put_request(req); continue; } -- cgit v1.2.3 From 4974341eb99861720d54db9337bf1fe78eb8b9d0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 29 May 2013 11:19:00 -0500 Subject: libceph: print more info for short message header If an osd client response message arrives that has a front section that's too big for the buffer set aside to receive it, a warning gets reported and a new buffer is allocated. The warning says nothing about which connection had the problem. Add the peer type and number to what gets reported, to be a bit more informative. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- net/ceph/osd_client.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e0abb83b520..61147fe7018 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2454,8 +2454,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, ceph_msg_revoke_incoming(req->r_reply); if (front > req->r_reply->front.iov_len) { - pr_warning("get_reply front %d > preallocated %d\n", - front, (int)req->r_reply->front.iov_len); + pr_warning("get_reply front %d > preallocated %d (%u#%llu)\n", + front, (int)req->r_reply->front.iov_len, + (unsigned int)con->peer_name.type, + le64_to_cpu(con->peer_name.num)); m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS, false); if (!m) goto out; -- cgit v1.2.3 From eb845ff13a44477f8a411baedbf11d678b9daf0a Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 31 May 2013 15:54:44 +0800 Subject: libceph: fix safe completion handle_reply() calls complete_request() only if the first OSD reply has ONDISK flag. Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 61147fe7018..3480b058794 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1522,6 +1522,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, for (i = 0; i < numops; i++) req->r_reply_op_result[i] = ceph_decode_32(&p); + already_completed = req->r_got_reply; + if (!req->r_got_reply) { req->r_result = result; @@ -1552,16 +1554,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, ((flags & CEPH_OSD_FLAG_WRITE) == 0)) __unregister_request(osdc, req); - already_completed = req->r_completed; - req->r_completed = 1; mutex_unlock(&osdc->request_mutex); - if (already_completed) - goto done; - if (req->r_callback) - req->r_callback(req, msg); - else - complete_all(&req->r_completion); + if (!already_completed) { + if (req->r_callback) + req->r_callback(req, msg); + else + complete_all(&req->r_completion); + } if (flags & CEPH_OSD_FLAG_ONDISK) complete_request(req); @@ -2121,7 +2121,6 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, __register_request(osdc, req); req->r_sent = 0; req->r_got_reply = 0; - req->r_completed = 0; rc = __map_request(osdc, req, 0); if (rc < 0) { if (nofail) { -- cgit v1.2.3 From ccca4e37b1a912da3db68aee826557ea66145273 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 2 Jun 2013 18:40:23 +0800 Subject: libceph: fix truncate size calculation check the "not truncated yet" case Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3480b058794..540dd29c921 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -733,12 +733,14 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, object_size = le32_to_cpu(layout->fl_object_size); object_base = off - objoff; - if (truncate_size <= object_base) { - truncate_size = 0; - } else { - truncate_size -= object_base; - if (truncate_size > object_size) - truncate_size = object_size; + if (!(truncate_seq == 1 && truncate_size == -1ULL)) { + if (truncate_size <= object_base) { + truncate_size = 0; + } else { + truncate_size -= object_base; + if (truncate_size > object_size) + truncate_size = object_size; + } } osd_req_op_extent_init(req, 0, opcode, objoff, objlen, -- cgit v1.2.3 From 2cb33cac622afde897aa02d3dcd9fbba8bae839e Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 20 Jun 2013 13:13:59 -0700 Subject: libceph: Fix NULL pointer dereference in auth client code A malicious monitor can craft an auth reply message that could cause a NULL function pointer dereference in the client's kernel. To prevent this, the auth_none protocol handler needs an empty ceph_auth_client_ops->build_request() function. CVE-2013-1059 Signed-off-by: Tyler Hicks Reported-by: Chanam Park Reviewed-by: Seth Arnold Reviewed-by: Sage Weil Cc: stable@vger.kernel.org --- net/ceph/auth_none.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index 925ca583c09..8c93fa8d81b 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -39,6 +39,11 @@ static int should_authenticate(struct ceph_auth_client *ac) return xi->starting; } +static int build_request(struct ceph_auth_client *ac, void *buf, void *end) +{ + return 0; +} + /* * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. @@ -106,6 +111,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .destroy = destroy, .is_authenticated = is_authenticated, .should_authenticate = should_authenticate, + .build_request = build_request, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, .destroy_authorizer = ceph_auth_none_destroy_authorizer, -- cgit v1.2.3 From 61c5d6bf7074ee32d014dcdf7698dc8c59eb712d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 24 Jun 2013 14:41:27 +0800 Subject: libceph: call r_unsafe_callback when unsafe reply is received We can't use !req->r_sent to check if OSD request is sent for the first time, this is because __cancel_request() zeros req->r_sent when OSD map changes. Rather than adding a new variable to struct ceph_osd_request to indicate if it's sent for the first time, We can call the unsafe callback only when unsafe OSD reply is received. If OSD's first reply is safe, just skip calling the unsafe callback. The purpose of unsafe callback is adding unsafe request to a list, so that fsync(2) can wait for the safe reply. fsync(2) doesn't need to wait for a write(2) that hasn't returned yet. So it's OK to add request to the unsafe list when the first OSD reply is received. (ceph_sync_write() returns after receiving the first OSD reply) Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 540dd29c921..dd47889adc4 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1337,10 +1337,6 @@ static void __send_request(struct ceph_osd_client *osdc, ceph_msg_get(req->r_request); /* send consumes a ref */ - /* Mark the request unsafe if this is the first timet's being sent. */ - - if (!req->r_sent && req->r_unsafe_callback) - req->r_unsafe_callback(req, true); req->r_sent = req->r_osd->o_incarnation; ceph_con_send(&req->r_osd->o_con, req->r_request); @@ -1431,8 +1427,6 @@ static void handle_osds_timeout(struct work_struct *work) static void complete_request(struct ceph_osd_request *req) { - if (req->r_unsafe_callback) - req->r_unsafe_callback(req, false); complete_all(&req->r_safe_completion); /* fsync waiter */ } @@ -1559,14 +1553,20 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, mutex_unlock(&osdc->request_mutex); if (!already_completed) { + if (req->r_unsafe_callback && + result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK)) + req->r_unsafe_callback(req, true); if (req->r_callback) req->r_callback(req, msg); else complete_all(&req->r_completion); } - if (flags & CEPH_OSD_FLAG_ONDISK) + if (flags & CEPH_OSD_FLAG_ONDISK) { + if (req->r_unsafe_callback && already_completed) + req->r_unsafe_callback(req, false); complete_request(req); + } done: dout("req=%p req->r_linger=%d\n", req, req->r_linger); -- cgit v1.2.3 From 4bc41b84e9b4d904f68cba2dbe0c60a5428c27c4 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 3 Jul 2013 16:04:25 +0900 Subject: core: Copy inner_protocol in copy_skb_header() inner_protocol was added to struct sk_buff in 0d89d2035fe063461a5ddb609b2c12e7fb006e44 ("MPLS: Add limited GSO support"), which is scheduled to be included in v3.11. That patch did not update __copy_skb_header to copy the inner_protocol. Signed-off-by: Joe Stringer Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 77971a35d6e..724bb7cb173 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -697,6 +697,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->transport_header = old->transport_header; new->network_header = old->network_header; new->mac_header = old->mac_header; + new->inner_protocol = old->inner_protocol; new->inner_transport_header = old->inner_transport_header; new->inner_network_header = old->inner_network_header; new->inner_mac_header = old->inner_mac_header; -- cgit v1.2.3 From 36b7bfe09b6deb71bf387852465245783c9a6208 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 3 Jul 2013 14:04:14 -0700 Subject: netem: fix possible NULL deref in netem_dequeue() commit aec0a40a6f7884 ("netem: use rb tree to implement the time queue") added a regression if a child qdisc is attached to netem, as we perform a NULL dereference. Fix this by adding a temporary variable to cache netem_skb_cb(skb)->time_to_send. Reported-by: Dan Carpenter Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index ed0082cf8ef..82f6016d89a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -554,10 +554,13 @@ deliver: } p = rb_first(&q->t_root); if (p) { + psched_time_t time_to_send; + skb = netem_rb_to_skb(p); /* if more time remaining? */ - if (netem_skb_cb(skb)->time_to_send <= psched_get_time()) { + time_to_send = netem_skb_cb(skb)->time_to_send; + if (time_to_send <= psched_get_time()) { rb_erase(p, &q->t_root); sch->q.qlen--; @@ -593,8 +596,7 @@ deliver: if (skb) goto deliver; } - qdisc_watchdog_schedule(&q->watchdog, - netem_skb_cb(skb)->time_to_send); + qdisc_watchdog_schedule(&q->watchdog, time_to_send); } if (q->qdisc) { -- cgit v1.2.3 From a1bdc45580fc19e968b32ad27cd7e476a4aa58f6 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 4 Jul 2013 00:52:49 +0900 Subject: net: ipv6: add missing lock in ping_v6_sendmsg Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/ping.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 2b52046e126..10b975577e9 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -174,6 +174,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); + lock_sock(sk); err = ip6_append_data(sk, ping_getfrag, &pfh, len, 0, hlimit, np->tclass, NULL, &fl6, rt, @@ -188,6 +189,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, (struct icmp6hdr *) &pfh.icmph, len); } + release_sock(sk); return err; } -- cgit v1.2.3 From fbfe80c890a1dc521d0b629b870e32fcffff0da5 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 4 Jul 2013 00:12:40 +0900 Subject: net: ipv6: fix wrong ping_v6_sendmsg return value ping_v6_sendmsg currently returns 0 on success. It should return the number of bytes written instead. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/ping.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 10b975577e9..18f19df4189 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -191,7 +191,10 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } release_sock(sk); - return err; + if (err) + return err; + + return len; } #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From 3630d40067a21d4dfbadc6002bb469ce26ac5d52 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 3 Jul 2013 20:45:04 +0200 Subject: ipv6: rt6_check_neigh should successfully verify neigh if no NUD information are available After the removal of rt->n we do not create a neighbour entry at route insertion time (rt6_bind_neighbour is gone). As long as no neighbour is created because of "useful traffic" we skip this routing entry because rt6_check_neigh cannot pick up a valid neighbour (neigh == NULL) and thus returns false. This change was introduced by commit 887c95cc1da53f66a5890fdeab13414613010097 ("ipv6: Complete neighbour entry removal from dst_entry.") To quote RFC4191: "If the host has no information about the router's reachability, then the host assumes the router is reachable." and also: "A host MUST NOT probe a router's reachability in the absence of useful traffic that the host would have sent to the router if it were reachable." So, just assume the router is reachable and let's rt6_probe do the rest. We don't need to create a neighbour on route insertion time. If we don't compile with CONFIG_IPV6_ROUTER_PREF (RFC4191 support) a neighbour is only valid if its nud_state is NUD_VALID. I did not find any references that we should probe the router on route insertion time via the other RFCs. So skip this route in that case. v2: a) use IS_ENABLED instead of #ifdefs (thanks to Sergei Shtylyov) Reported-by: Pierre Emeriaud Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9ff0b78a9c1..bd5fd705403 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -551,6 +551,8 @@ static inline bool rt6_check_neigh(struct rt6_info *rt) ret = true; #endif read_unlock(&neigh->lock); + } else if (IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + ret = true; } rcu_read_unlock_bh(); -- cgit v1.2.3 From 86bd68bfd75941d4cf3b874468791c3e73eef23d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 3 Jul 2013 17:00:34 +0200 Subject: sit: fix tunnel update via netlink The device can stand in another netns, hence we need to do the lookup in netns tunnel->net. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 85ff37b1ce0..a3437a4cd07 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1426,9 +1426,9 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *t; + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; - struct net *net = dev_net(dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; -- cgit v1.2.3 From c7e8e8a8f7a70b343ca1e0f90a31e35ab2d16de1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 5 Jul 2013 19:36:17 +0800 Subject: bridge: fix some kernel warning in multicast timer Several people reported the warning: "kernel BUG at kernel/timer.c:729!" and the stack trace is: #7 [ffff880214d25c10] mod_timer+501 at ffffffff8106d905 #8 [ffff880214d25c50] br_multicast_del_pg.isra.20+261 at ffffffffa0731d25 [bridge] #9 [ffff880214d25c80] br_multicast_disable_port+88 at ffffffffa0732948 [bridge] #10 [ffff880214d25cb0] br_stp_disable_port+154 at ffffffffa072bcca [bridge] #11 [ffff880214d25ce8] br_device_event+520 at ffffffffa072a4e8 [bridge] #12 [ffff880214d25d18] notifier_call_chain+76 at ffffffff8164aafc #13 [ffff880214d25d50] raw_notifier_call_chain+22 at ffffffff810858f6 #14 [ffff880214d25d60] call_netdevice_notifiers+45 at ffffffff81536aad #15 [ffff880214d25d80] dev_close_many+183 at ffffffff81536d17 #16 [ffff880214d25dc0] rollback_registered_many+168 at ffffffff81537f68 #17 [ffff880214d25de8] rollback_registered+49 at ffffffff81538101 #18 [ffff880214d25e10] unregister_netdevice_queue+72 at ffffffff815390d8 #19 [ffff880214d25e30] __tun_detach+272 at ffffffffa074c2f0 [tun] #20 [ffff880214d25e88] tun_chr_close+45 at ffffffffa074c4bd [tun] #21 [ffff880214d25ea8] __fput+225 at ffffffff8119b1f1 #22 [ffff880214d25ef0] ____fput+14 at ffffffff8119b3fe #23 [ffff880214d25f00] task_work_run+159 at ffffffff8107cf7f #24 [ffff880214d25f30] do_notify_resume+97 at ffffffff810139e1 #25 [ffff880214d25f50] int_signal+18 at ffffffff8164f292 this is due to I forgot to check if mp->timer is armed in br_multicast_del_pg(). This bug is introduced by commit 9f00b2e7cf241fa389733d41b6 (bridge: only expire the mdb entry when query is received). Same for __br_mdb_del(). Tested-by: poma Reported-by: LiYonghua <809674045@qq.com> Reported-by: Robert Hancock Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 2 +- net/bridge/br_multicast.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 19942e38fd2..0daae3ec235 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -447,7 +447,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) call_rcu_bh(&p->rcu, br_multicast_free_pg); err = 0; - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->mglist && mp->timer_armed && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); break; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 81befac015e..69af490cce4 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -270,7 +270,7 @@ static void br_multicast_del_pg(struct net_bridge *br, del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->mglist && mp->timer_armed && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); -- cgit v1.2.3 From 2f28c8b31dc501027d9aa6acf496c5941736312b Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Wed, 29 May 2013 12:15:07 -0700 Subject: net/9p: add privport option to 9p tcp transport If the privport option is specified, the tcp transport binds local address to a reserved port before connecting to the 9p server. In some cases when 9P AUTH cannot be implemented, this is better than nothing. Signed-off-by: Jim Garlick Signed-off-by: Eric Van Hensbergen --- net/9p/trans_fd.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 02efb25c295..3ffda1b3799 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -63,6 +63,7 @@ struct p9_fd_opts { int rfd; int wfd; u16 port; + int privport; }; /** @@ -87,12 +88,15 @@ struct p9_trans_fd { enum { /* Options that take integer arguments */ Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, + /* Options that take no arguments */ + Opt_privport, }; static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, + {Opt_privport, "privport"}, {Opt_err, NULL}, }; @@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); static DECLARE_WORK(p9_poll_work, p9_poll_workfn); +static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; +static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; + static void p9_mux_poll_stop(struct p9_conn *m) { unsigned long flags; @@ -741,7 +748,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) if (!*p) continue; token = match_token(p, tokens, args); - if (token != Opt_err) { + if ((token != Opt_err) && (token != Opt_privport)) { r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, @@ -759,6 +766,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) case Opt_wfdno: opts->wfd = option; break; + case Opt_privport: + opts->privport = 1; + break; default: continue; } @@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf) return 0; } +static int p9_bind_privport(struct socket *sock) +{ + struct sockaddr_in cl; + int port, err = -EINVAL; + + memset(&cl, 0, sizeof(cl)); + cl.sin_family = AF_INET; + cl.sin_addr.s_addr = INADDR_ANY; + for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { + cl.sin_port = htons((ushort)port); + err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); + if (err != -EADDRINUSE) + break; + } + return err; +} + + static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { @@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) return err; } + if (opts.privport) { + err = p9_bind_privport(csocket); + if (err < 0) { + pr_err("%s (%d): problem binding to privport\n", + __func__, task_pid_nr(current)); + sock_release(csocket); + return err; + } + } + err = csocket->ops->connect(csocket, (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); -- cgit v1.2.3 From ea071aa1365eaf8a79b33bd8699cb0811dcddf34 Mon Sep 17 00:00:00 2001 From: Simon Derr Date: Fri, 21 Jun 2013 15:32:34 +0200 Subject: 9P: Fix fcall allocation for rdma The current code assumes that when a request in the request array does have a tc, it also has a rc. This is normally true, but not always : when using RDMA, req->rc will temporarily be set to NULL after the request has been sent. That is usually OK though, as when the reply arrives, req->rc will be reassigned to a sane value before the request is recycled. But there is a catch : if the request is flushed, the reply will never arrive, and req->rc will be NULL, but not req->tc. This patch fixes p9_tag_alloc to take this into account. Signed-off-by: Simon Derr Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 01f1779eba8..5828769d1f3 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -258,27 +258,25 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) req = &c->reqs[row][col]; if (!req->tc) { req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); - if (!req->wq) { - pr_err("Couldn't grow tag array\n"); - return ERR_PTR(-ENOMEM); - } + if (!req->wq) + goto grow_failed; + init_waitqueue_head(req->wq); req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); + if (!req->tc) + goto grow_failed; + + req->tc->capacity = alloc_msize; + req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); + } + if (!req->rc) { req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); - if ((!req->tc) || (!req->rc)) { - pr_err("Couldn't grow tag array\n"); - kfree(req->tc); - kfree(req->rc); - kfree(req->wq); - req->tc = req->rc = NULL; - req->wq = NULL; - return ERR_PTR(-ENOMEM); - } - req->tc->capacity = alloc_msize; + if (!req->rc) + goto grow_failed; + req->rc->capacity = alloc_msize; - req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); } @@ -288,7 +286,16 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) req->tc->tag = tag-1; req->status = REQ_STATUS_ALLOC; - return &c->reqs[row][col]; + return req; + +grow_failed: + pr_err("Couldn't grow tag array\n"); + kfree(req->tc); + kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; + return ERR_PTR(-ENOMEM); } /** -- cgit v1.2.3 From 17b6fd9d6dfa0faed3a25a6045f7456821ea140a Mon Sep 17 00:00:00 2001 From: Simon Derr Date: Fri, 21 Jun 2013 15:32:35 +0200 Subject: 9P/RDMA: rdma_request() needs not allocate req->rc p9_tag_alloc() takes care of that. Signed-off-by: Simon Derr Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 2c69ddd691a..b1dfdf2078f 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -427,26 +427,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) err = -ENOMEM; goto err_close; } - - /* - * If the request has a buffer, steal it, otherwise - * allocate a new one. Typically, requests should already - * have receive buffers allocated and just swap them around - */ - if (!req->rc) { - req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, - GFP_NOFS); - if (req->rc) { - req->rc->sdata = (char *) req->rc + - sizeof(struct p9_fcall); - req->rc->capacity = client->msize; - } - } rpl_context->rc = req->rc; - if (!rpl_context->rc) { - err = -ENOMEM; - goto err_free2; - } /* * Post a receive buffer for this request. We need to ensure -- cgit v1.2.3 From 5387320d4814aa1e40b50529d960a8f2b3340535 Mon Sep 17 00:00:00 2001 From: Simon Derr Date: Fri, 21 Jun 2013 15:32:36 +0200 Subject: 9pnet: refactor struct p9_fcall alloc code Signed-off-by: Simon Derr Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 5828769d1f3..db5bf2480a3 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -204,6 +204,17 @@ free_and_return: return ret; } +struct p9_fcall *p9_fcall_alloc(int alloc_msize) +{ + struct p9_fcall *fc; + fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); + if (!fc) + return NULL; + fc->capacity = alloc_msize; + fc->sdata = (char *) fc + sizeof(struct p9_fcall); + return fc; +} + /** * p9_tag_alloc - lookup/allocate a request by tag * @c: client session to lookup tag within @@ -256,29 +267,19 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) col = tag % P9_ROW_MAXTAG; req = &c->reqs[row][col]; - if (!req->tc) { + if (!req->wq) { req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); if (!req->wq) goto grow_failed; - init_waitqueue_head(req->wq); - req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, - GFP_NOFS); - if (!req->tc) - goto grow_failed; - - req->tc->capacity = alloc_msize; - req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); } - if (!req->rc) { - req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, - GFP_NOFS); - if (!req->rc) - goto grow_failed; - req->rc->capacity = alloc_msize; - req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); - } + if (!req->tc) + req->tc = p9_fcall_alloc(alloc_msize); + if (!req->rc) + req->rc = p9_fcall_alloc(alloc_msize); + if (!req->tc || !req->rc) + goto grow_failed; p9pdu_reset(req->tc); p9pdu_reset(req->rc); -- cgit v1.2.3 From 3fcc62f4e8620fd5f85f957a5e708e69a20adb51 Mon Sep 17 00:00:00 2001 From: Simon Derr Date: Fri, 21 Jun 2013 15:32:37 +0200 Subject: 9P/RDMA: increase P9_RDMA_MAXSIZE to 1MB The current value is too low to get good performance. Signed-off-by: Simon Derr Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index b1dfdf2078f..b8b66d38f5b 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -57,9 +57,7 @@ #define P9_RDMA_IRD 0 #define P9_RDMA_ORD 0 #define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ -#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can - * safely advertise a maxsize - * of 64k */ +#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */ /** * struct p9_trans_rdma - RDMA transport instance -- cgit v1.2.3 From 47229ff85e5a0b0613df2288d212938aeb9687da Mon Sep 17 00:00:00 2001 From: Simon Derr Date: Fri, 21 Jun 2013 15:32:38 +0200 Subject: 9P/RDMA: Protect against duplicate replies A well-behaved server would not send twice the reply to a request. But if it ever happens... This additional check prevents the kernel from leaking memory and possibly more nasty consequences in that unlikely event. Signed-off-by: Simon Derr Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index b8b66d38f5b..274a9c1d3c3 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -294,6 +294,13 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, if (!req) goto err_out; + /* Check that we have not yet received a reply for this request. + */ + if (unlikely(req->rc)) { + pr_err("Duplicate reply for request %d", tag); + goto err_out; + } + req->rc = c->rc; req->status = REQ_STATUS_RCVD; p9_client_cb(client, req); -- cgit v1.2.3 From fd453d0ed6c1dacef8eff466df473d62d63db1e9 Mon Sep 17 00:00:00 2001 From: Simon Derr Date: Fri, 21 Jun 2013 15:32:39 +0200 Subject: 9P/RDMA: Use a semaphore to protect the RQ The current code keeps track of the number of buffers posted in the RQ, and will prevent it from overflowing. But it does so by simply dropping post requests (And leaking memory in the process). When this happens there will actually be too few buffers posted, and soon the 9P server will complain about 'RNR retry counter exceeded' errors. Instead, use a semaphore, and block until the RQ is ready for another buffer to be posted. Signed-off-by: Simon Derr Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 274a9c1d3c3..ad8dc331574 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -73,7 +73,7 @@ * @sq_depth: The depth of the Send Queue * @sq_sem: Semaphore for the SQ * @rq_depth: The depth of the Receive Queue. - * @rq_count: Count of requests in the Receive Queue. + * @rq_sem: Semaphore for the RQ * @addr: The remote peer's address * @req_lock: Protects the active request list * @cm_done: Completion event for connection management tracking @@ -98,7 +98,7 @@ struct p9_trans_rdma { int sq_depth; struct semaphore sq_sem; int rq_depth; - atomic_t rq_count; + struct semaphore rq_sem; struct sockaddr_in addr; spinlock_t req_lock; @@ -341,8 +341,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) switch (c->wc_op) { case IB_WC_RECV: - atomic_dec(&rdma->rq_count); handle_recv(client, rdma, c, wc.status, wc.byte_len); + up(&rdma->rq_sem); break; case IB_WC_SEND: @@ -441,12 +441,14 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) * outstanding request, so we must keep a count to avoid * overflowing the RQ. */ - if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { - err = post_recv(client, rpl_context); - if (err) - goto err_free1; - } else - atomic_dec(&rdma->rq_count); + if (down_interruptible(&rdma->rq_sem)) + goto error; /* FIXME : -EINTR instead */ + + err = post_recv(client, rpl_context); + if (err) { + p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); + goto err_free1; + } /* remove posted receive buffer from request structure */ req->rc = NULL; @@ -537,7 +539,7 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) spin_lock_init(&rdma->req_lock); init_completion(&rdma->cm_done); sema_init(&rdma->sq_sem, rdma->sq_depth); - atomic_set(&rdma->rq_count, 0); + sema_init(&rdma->rq_sem, rdma->rq_depth); return rdma; } -- cgit v1.2.3 From b530e252e291c27fdcb1b73c72ad17f75c8bdba6 Mon Sep 17 00:00:00 2001 From: Simon Derr Date: Fri, 21 Jun 2013 15:32:40 +0200 Subject: 9P/RDMA: Do not free req->rc in error handling in rdma_request() rdma_request() should never be in charge of freeing rc. When an error occurs: * Either the rc buffer has been recv_post()'ed. then kfree()'ing it certainly is a bad idea. * Or is has not, and in that case req->rc still points to it, hence it needs not be freed. Signed-off-by: Simon Derr Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index ad8dc331574..1bd4c715011 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -447,7 +447,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) err = post_recv(client, rpl_context); if (err) { p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); - goto err_free1; + goto err_free; } /* remove posted receive buffer from request structure */ @@ -457,7 +457,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) c = kmalloc(sizeof *c, GFP_NOFS); if (!c) { err = -ENOMEM; - goto err_free1; + goto err_free; } c->req = req; @@ -486,13 +486,10 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) error: kfree(c); - kfree(rpl_context->rc); kfree(rpl_context); p9_debug(P9_DEBUG_ERROR, "EIO\n"); return -EIO; - err_free1: - kfree(rpl_context->rc); - err_free2: + err_free: kfree(rpl_context); err_close: spin_lock_irqsave(&rdma->req_lock, flags); -- cgit v1.2.3 From 2f52d07cb75d96fcbb5b9ab72938590fa9ffb19d Mon Sep 17 00:00:00 2001 From: Simon Derr Date: Fri, 21 Jun 2013 15:32:41 +0200 Subject: 9P/RDMA: Improve error handling in rdma_request Most importantly: - do not free the recv context (rpl_context) after a successful post_recv() - but do free the send context (c) after a failed send. Signed-off-by: Simon Derr Signed-off-by: Eric Van Hensbergen --- net/9p/trans_rdma.c | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 1bd4c715011..926e72d00e5 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -430,7 +430,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); if (!rpl_context) { err = -ENOMEM; - goto err_close; + goto recv_error; } rpl_context->rc = req->rc; @@ -441,13 +441,15 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) * outstanding request, so we must keep a count to avoid * overflowing the RQ. */ - if (down_interruptible(&rdma->rq_sem)) - goto error; /* FIXME : -EINTR instead */ + if (down_interruptible(&rdma->rq_sem)) { + err = -EINTR; + goto recv_error; + } err = post_recv(client, rpl_context); if (err) { p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); - goto err_free; + goto recv_error; } /* remove posted receive buffer from request structure */ @@ -457,15 +459,17 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) c = kmalloc(sizeof *c, GFP_NOFS); if (!c) { err = -ENOMEM; - goto err_free; + goto send_error; } c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, c->req->tc->sdata, c->req->tc->size, DMA_TO_DEVICE); - if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) - goto error; + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { + err = -EIO; + goto send_error; + } sge.addr = c->busa; sge.length = c->req->tc->size; @@ -479,19 +483,27 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) wr.sg_list = &sge; wr.num_sge = 1; - if (down_interruptible(&rdma->sq_sem)) - goto error; + if (down_interruptible(&rdma->sq_sem)) { + err = -EINTR; + goto send_error; + } - return ib_post_send(rdma->qp, &wr, &bad_wr); + err = ib_post_send(rdma->qp, &wr, &bad_wr); + if (err) + goto send_error; - error: + /* Success */ + return 0; + + /* Handle errors that happened during or while preparing the send: */ + send_error: kfree(c); + p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); + return err; + + /* Handle errors that happened during or while preparing post_recv(): */ + recv_error: kfree(rpl_context); - p9_debug(P9_DEBUG_ERROR, "EIO\n"); - return -EIO; - err_free: - kfree(rpl_context); - err_close: spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; -- cgit v1.2.3 From 1cff33069a4a1ac9ed080756113ecd17ad408282 Mon Sep 17 00:00:00 2001 From: Simon Derr Date: Fri, 21 Jun 2013 15:32:42 +0200 Subject: 9P/RDMA: count posted buffers without a pending request In rdma_request(): If an error occurs between posting the recv and the send, there will be a reply context posted without a pending request. Since there is no way to "un-post" it, we remember it and skip post_recv() for the next request. Signed-off-by: Simon Derr Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 6 ++++-- net/9p/trans_rdma.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index db5bf2480a3..d18a0b22f62 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -656,8 +656,10 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) return PTR_ERR(req); - /* if we haven't received a response for oldreq, - remove it from the list. */ + /* + * if we haven't received a response for oldreq, + * remove it from the list. + */ spin_lock(&c->lock); if (oldreq->status == REQ_STATUS_FLSH) list_del(&oldreq->req_list); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 926e72d00e5..8f68df5d297 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -74,6 +74,8 @@ * @sq_sem: Semaphore for the SQ * @rq_depth: The depth of the Receive Queue. * @rq_sem: Semaphore for the RQ + * @excess_rc : Amount of posted Receive Contexts without a pending request. + * See rdma_request() * @addr: The remote peer's address * @req_lock: Protects the active request list * @cm_done: Completion event for connection management tracking @@ -99,6 +101,7 @@ struct p9_trans_rdma { struct semaphore sq_sem; int rq_depth; struct semaphore rq_sem; + atomic_t excess_rc; struct sockaddr_in addr; spinlock_t req_lock; @@ -426,6 +429,26 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) struct p9_rdma_context *c = NULL; struct p9_rdma_context *rpl_context = NULL; + /* When an error occurs between posting the recv and the send, + * there will be a receive context posted without a pending request. + * Since there is no way to "un-post" it, we remember it and skip + * post_recv() for the next request. + * So here, + * see if we are this `next request' and need to absorb an excess rc. + * If yes, then drop and free our own, and do not recv_post(). + **/ + if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { + if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { + /* Got one ! */ + kfree(req->rc); + req->rc = NULL; + goto dont_need_post_recv; + } else { + /* We raced and lost. */ + atomic_inc(&rdma->excess_rc); + } + } + /* Allocate an fcall for the reply */ rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); if (!rpl_context) { @@ -451,10 +474,10 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); goto recv_error; } - /* remove posted receive buffer from request structure */ req->rc = NULL; +dont_need_post_recv: /* Post the request */ c = kmalloc(sizeof *c, GFP_NOFS); if (!c) { @@ -499,6 +522,11 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) send_error: kfree(c); p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); + + /* Ach. + * We did recv_post(), but not send. We have one recv_post in excess. + */ + atomic_inc(&rdma->excess_rc); return err; /* Handle errors that happened during or while preparing post_recv(): */ @@ -549,6 +577,7 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) init_completion(&rdma->cm_done); sema_init(&rdma->sq_sem, rdma->sq_depth); sema_init(&rdma->rq_sem, rdma->rq_depth); + atomic_set(&rdma->excess_rc, 0); return rdma; } -- cgit v1.2.3 From 80b45261a0b263536b043c5ccfc4ba4fc27c2acc Mon Sep 17 00:00:00 2001 From: Simon Derr Date: Fri, 21 Jun 2013 15:32:43 +0200 Subject: 9P: Add cancelled() to the transport functions. RDMA needs to post a buffer for each incoming reply. Hence it needs to keep count of these and needs to be aware of whether a flushed request has received a reply or not. This patch adds the cancelled() callback to the transport modules. It is called when RFLUSH has been received and that the corresponding request will never receive a reply. Signed-off-by: Simon Derr Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 12 +++++++++--- net/9p/trans_rdma.c | 11 +++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index d18a0b22f62..8b93cae2d11 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -658,12 +658,18 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) /* * if we haven't received a response for oldreq, - * remove it from the list. + * remove it from the list, and notify the transport + * layer that the reply will never arrive. */ spin_lock(&c->lock); - if (oldreq->status == REQ_STATUS_FLSH) + if (oldreq->status == REQ_STATUS_FLSH) { list_del(&oldreq->req_list); - spin_unlock(&c->lock); + spin_unlock(&c->lock); + if (c->trans_mod->cancelled) + c->trans_mod->cancelled(c, req); + } else { + spin_unlock(&c->lock); + } p9_free_req(c, req); return 0; diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 8f68df5d297..928f2bb9bf8 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -588,6 +588,17 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) return 1; } +/* A request has been fully flushed without a reply. + * That means we have posted one buffer in excess. + */ +static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + + atomic_inc(&rdma->excess_rc); + return 0; +} + /** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance -- cgit v1.2.3 From 0979292bfa301cb87d936b69af428090d2feea1b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Jul 2013 13:44:45 -0400 Subject: svcrpc: fix failures to handle -1 uid's As of f025adf191924e3a75ce80e130afcd2485b53bb8 "sunrpc: Properly decode kuids and kgids in RPC_AUTH_UNIX credentials" any rpc containing a -1 (0xffff) uid or gid would fail with a badcred error. Commit afe3c3fd5392b2f0066930abc5dbd3f4b14a0f13 "svcrpc: fix failures to handle -1 uid's and gid's" fixed part of the problem, but overlooked the gid upcall--the kernel can request supplementary gid's for the -1 uid, but mountd's attempt write a response will get -EINVAL. Symptoms were nfsd failing to reply to the first attempt to use a newly negotiated krb5 context. Reported-by: Sven Geggus Tested-by: Sven Geggus Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index a98853dfccd..621ca7b4a15 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -493,8 +493,6 @@ static int unix_gid_parse(struct cache_detail *cd, if (rv) return -EINVAL; uid = make_kuid(&init_user_ns, id); - if (!uid_valid(uid)) - return -EINVAL; ug.uid = uid; expiry = get_expiry(&mesg); -- cgit v1.2.3 From cbf55001b2ddb814329735641be5d29b08c82b08 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 8 Jul 2013 16:20:34 +0300 Subject: net: rename low latency sockets functions to busy poll Rename functions in include/net/ll_poll.h to busy wait. Clarify documentation about expected power use increase. Rename POLL_LL to POLL_BUSY_LOOP. Add need_resched() testing to poll/select busy loops. Note, that in select and poll can_busy_poll is dynamic and is updated continuously to reflect the existence of supported sockets with valid queue information. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/core/datagram.c | 3 ++- net/ipv4/tcp.c | 6 +++--- net/socket.c | 12 ++++++------ 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index 9cbaba98ce4..6e9ab31e457 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -208,7 +208,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, } spin_unlock_irqrestore(&queue->lock, cpu_flags); - if (sk_valid_ll(sk) && sk_poll_ll(sk, flags & MSG_DONTWAIT)) + if (sk_can_busy_loop(sk) && + sk_busy_loop(sk, flags & MSG_DONTWAIT)) continue; /* User doesn't want to wait */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46ed9afd1f5..15cbfa94bd8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1554,9 +1554,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sk_buff *skb; u32 urg_hole = 0; - if (sk_valid_ll(sk) && skb_queue_empty(&sk->sk_receive_queue) - && (sk->sk_state == TCP_ESTABLISHED)) - sk_poll_ll(sk, nonblock); + if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && + (sk->sk_state == TCP_ESTABLISHED)) + sk_busy_loop(sk, nonblock); lock_sock(sk); diff --git a/net/socket.c b/net/socket.c index 4da14cbd49b..45afa648364 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1148,7 +1148,7 @@ EXPORT_SYMBOL(sock_create_lite); /* No kernel lock held - perfect */ static unsigned int sock_poll(struct file *file, poll_table *wait) { - unsigned int ll_flag = 0; + unsigned int busy_flag = 0; struct socket *sock; /* @@ -1156,16 +1156,16 @@ static unsigned int sock_poll(struct file *file, poll_table *wait) */ sock = file->private_data; - if (sk_valid_ll(sock->sk)) { + if (sk_can_busy_loop(sock->sk)) { /* this socket can poll_ll so tell the system call */ - ll_flag = POLL_LL; + busy_flag = POLL_BUSY_LOOP; /* once, only if requested by syscall */ - if (wait && (wait->_key & POLL_LL)) - sk_poll_ll(sock->sk, 1); + if (wait && (wait->_key & POLL_BUSY_LOOP)) + sk_busy_loop(sock->sk, 1); } - return ll_flag | sock->ops->poll(file, sock, wait); + return busy_flag | sock->ops->poll(file, sock, wait); } static int sock_mmap(struct file *file, struct vm_area_struct *vma) -- cgit v1.2.3 From 8c2f414ad1b3aa3af05791cd7312eb8ff9d80e0d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Jul 2013 16:17:04 +0200 Subject: net: sctp: confirm route during forward progress This fix has been proposed originally by Vlad Yasevich. He says: When SCTP makes forward progress (receives a SACK that acks new chunks, renegs, or answeres 0-window probes) or when HB-ACK arrives, mark the route as confirmed so we don't unnecessarily send NUD probes. Having a simple SCTP client/server that exchange data chunks every 1sec, without this patch ARP requests are sent periodically every 40-60sec. With this fix applied, an ARP request is only done once right at the "session" beginning. Also, when clearing the related ARP cache entry manually during the session, a new request is correctly done. I have only "backported" this to net-next and tested that it works, so full credit goes to Vlad. Signed-off-by: Vlad Yasevich Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 9 +++++++++ net/sctp/sm_sideeffect.c | 6 ++++++ 2 files changed, 15 insertions(+) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index cb80a8e060b..ef9e2bbc0f2 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1334,6 +1334,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, __u8 restart_timer = 0; int bytes_acked = 0; int migrate_bytes = 0; + bool forward_progress = false; sack_ctsn = ntohl(sack->cum_tsn_ack); @@ -1400,6 +1401,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); + forward_progress = true; } if (TSN_lte(tsn, sack_ctsn)) { @@ -1413,6 +1415,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * current RTO. */ restart_timer = 1; + forward_progress = true; if (!tchunk->tsn_gap_acked) { /* @@ -1503,6 +1506,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ transport->error_count = 0; transport->asoc->overall_error_count = 0; + forward_progress = true; /* * While in SHUTDOWN PENDING, we may have started @@ -1576,6 +1580,11 @@ static void sctp_check_transmitted(struct sctp_outq *q, jiffies + transport->rto)) sctp_transport_hold(transport); } + + if (forward_progress) { + if (transport->dst) + dst_confirm(transport->dst); + } } list_splice(&tlist, transmitted_queue); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index cf6f8451822..9da68852ee9 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -730,6 +730,12 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, SCTP_HEARTBEAT_SUCCESS); + /* HB-ACK was received for a the proper HB. Consider this + * forward progress. + */ + if (t->dst) + dst_confirm(t->dst); + /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address * using the time value carried in the HEARTBEAT ACK chunk. -- cgit v1.2.3 From 76fa66657900071016f2bae61de28f059f3f2abf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 2 Jul 2013 13:00:52 -0400 Subject: rpc_pipe: set dentry operations at d_alloc time Currently the way these get set is a little convoluted. If the dentry is allocated via lookup from userland, then it gets set by simple_lookup. If it gets allocated when the kernel is populating the directory, then it gets set via __rpc_lookup_create_exclusive, which has to check whether they might already be set. Between both of these, this ensures that all dentries have their d_op pointer set. Instead of doing that, just have them set at d_alloc time by pointing sb->s_d_op at them. With that change, we no longer want the lookup op to set them, so we must move to using our own lookup routine. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 4679df5a6d5..c5f6812ca06 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -480,6 +480,23 @@ static const struct dentry_operations rpc_dentry_operations = { .d_delete = rpc_delete_dentry, }; +/* + * Lookup the data. This is trivial - if the dentry didn't already + * exist, we know it is negative. + */ +static struct dentry * +rpc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + d_add(dentry, NULL); + return NULL; +} + +const struct inode_operations rpc_dir_inode_operations = { + .lookup = rpc_lookup, +}; + static struct inode * rpc_get_inode(struct super_block *sb, umode_t mode) { @@ -492,7 +509,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode) switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; - inode->i_op = &simple_dir_inode_operations; + inode->i_op = &rpc_dir_inode_operations; inc_nlink(inode); default: break; @@ -666,11 +683,8 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, if (!dentry) return ERR_PTR(-ENOMEM); } - if (dentry->d_inode == NULL) { - if (!dentry->d_op) - d_set_d_op(dentry, &rpc_dentry_operations); + if (dentry->d_inode == NULL) return dentry; - } dput(dentry); return ERR_PTR(-EEXIST); } @@ -1117,6 +1131,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = RPCAUTH_GSSMAGIC; sb->s_op = &s_ops; + sb->s_d_op = &rpc_dentry_operations; sb->s_time_gran = 1; inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); -- cgit v1.2.3 From 4f8568cb5290295c384d5c1328c52790e33a8a0d Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Wed, 10 Jul 2013 09:17:14 +0800 Subject: rpc_pipe: rpc_dir_inode_operations can be static Hi Jeff, FYI, there are new sparse warnings show up in tree: git://git.linux-nfs.org/projects/trondmy/linux-nfs.git nfs-for-next head: 296afe1f58d55fd56ed85daaafafcfee39f59ece commit: 76fa66657900071016f2bae61de28f059f3f2abf [2/5] rpc_pipe: set dentry operations at d_alloc time >> net/sunrpc/rpc_pipe.c:496:31: sparse: symbol 'rpc_dir_inode_operations' was not declared. Should it be static? Please consider folding the attached diff :-) Signed-off-by: Fengguang Wu Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index c5f6812ca06..61239a2cb78 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -493,7 +493,7 @@ rpc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) return NULL; } -const struct inode_operations rpc_dir_inode_operations = { +static const struct inode_operations rpc_dir_inode_operations = { .lookup = rpc_lookup, }; -- cgit v1.2.3 From eeee245268c951262b861bc1be4e9dc812352499 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 10 Jul 2013 15:33:01 -0400 Subject: SUNRPC: Fix a deadlock in rpc_client_register() Commit 384816051ca9125cd54750e59c780c2a2655fa4f (SUNRPC: fix races on PipeFS MOUNT notifications) introduces a regression when we call rpc_setup_pipedir() with RPCSEC_GSS as the auth flavour. By calling rpcauth_create() while holding the sn->pipefs_sb_lock, we end up deadlocking in gss_pipes_dentries_create_net(). Fix is to register the client and release the mutex before calling rpcauth_create(). Reported-by: Weston Andros Adamson Tested-by: Weston Andros Adamson Cc: Stanislav Kinsbursky Cc: # : 3848160: SUNRPC: fix races on PipeFS MOUNT Cc: # : e73f4cc: SUNRPC: split client creation Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f0339ae9bf3..aa401560777 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -290,7 +290,7 @@ static int rpc_client_register(const struct rpc_create_args *args, struct rpc_auth *auth; struct net *net = rpc_net_ns(clnt); struct super_block *pipefs_sb; - int err = 0; + int err; pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { @@ -299,6 +299,10 @@ static int rpc_client_register(const struct rpc_create_args *args, goto out; } + rpc_register_client(clnt); + if (pipefs_sb) + rpc_put_sb_net(net); + auth = rpcauth_create(args->authflavor, clnt); if (IS_ERR(auth)) { dprintk("RPC: Couldn't create auth handle (flavor %u)\n", @@ -306,16 +310,14 @@ static int rpc_client_register(const struct rpc_create_args *args, err = PTR_ERR(auth); goto err_auth; } - - rpc_register_client(clnt); + return 0; +err_auth: + pipefs_sb = rpc_get_sb_net(net); + __rpc_clnt_remove_pipedir(clnt); out: if (pipefs_sb) rpc_put_sb_net(net); return err; - -err_auth: - __rpc_clnt_remove_pipedir(clnt); - goto out; } static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) -- cgit v1.2.3 From 076bb0c82a44fbe46fe2c8527a5b5b64b69f679d Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 10 Jul 2013 17:13:17 +0300 Subject: net: rename include/net/ll_poll.h to include/net/busy_poll.h Rename the file and correct all the places where it is included. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- net/core/sock.c | 2 +- net/core/sysctl_net_core.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- net/socket.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index 6e9ab31e457..8ab48cd8955 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -56,7 +56,7 @@ #include #include #include -#include +#include /* * Is a socket 'connection oriented' ? diff --git a/net/core/sock.c b/net/core/sock.c index ab06b719f5b..9bfe83f4d67 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,7 +139,7 @@ #include #endif -#include +#include static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index afc677eadd9..1a298cb3dae 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include static int one = 1; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 15cbfa94bd8..5423223e93c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,7 +279,7 @@ #include #include -#include +#include int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 35675e46aff..3a261b41a00 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -75,7 +75,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6b270e53c20..bcc0ff2c16d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -109,7 +109,7 @@ #include #include #include -#include +#include #include "udp_impl.h" struct udp_table udp_table __read_mostly; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5cffa5c3e6b..345bd92d4dd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -63,7 +63,7 @@ #include #include #include -#include +#include #include diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b6f31437a1f..40e72034da0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/socket.c b/net/socket.c index 45afa648364..6a3e9a3f50a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,7 +104,7 @@ #include #include #include -#include +#include #ifdef CONFIG_NET_LL_RX_POLL unsigned int sysctl_net_ll_read __read_mostly; -- cgit v1.2.3 From 8b80cda536ea9bceec0364e897868a30ee13b992 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 10 Jul 2013 17:13:26 +0300 Subject: net: rename ll methods to busy-poll Rename ndo_ll_poll to ndo_busy_poll. Rename sk_mark_ll to sk_mark_napi_id. Rename skb_mark_ll to skb_mark_napi_id. Correct all useres of these functions. Update comments and defines in include/net/busy_poll.h Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3a261b41a00..b299da5ff49 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1994,7 +1994,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bcc0ff2c16d..a0d7151ffbd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1713,7 +1713,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 345bd92d4dd..6e1649d5853 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1499,7 +1499,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 40e72034da0..f4058150262 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -844,7 +844,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - sk_mark_ll(sk, skb); + sk_mark_napi_id(sk, skb); ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); -- cgit v1.2.3 From 64b0dc517ea1b35d02565a779e6cb77ae9045685 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 10 Jul 2013 17:13:36 +0300 Subject: net: rename busy poll socket op and globals Rename LL_SO to BUSY_POLL_SO Rename sysctl_net_ll_{read,poll} to sysctl_busy_{read,poll} Fix up users of these variables. Fix documentation for sysctl. a patch for the socket.7 man page will follow separately, because of limitations of my mail setup. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- net/core/sock.c | 6 +++--- net/core/sysctl_net_core.c | 8 ++++---- net/socket.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 9bfe83f4d67..548d716c5f6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -901,7 +901,7 @@ set_rcvbuf: break; #ifdef CONFIG_NET_LL_RX_POLL - case SO_LL: + case SO_BUSY_POLL: /* allow unprivileged users to decrease the value */ if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) ret = -EPERM; @@ -1171,7 +1171,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; #ifdef CONFIG_NET_LL_RX_POLL - case SO_LL: + case SO_BUSY_POLL: v.val = sk->sk_ll_usec; break; #endif @@ -2294,7 +2294,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) #ifdef CONFIG_NET_LL_RX_POLL sk->sk_napi_id = 0; - sk->sk_ll_usec = sysctl_net_ll_read; + sk->sk_ll_usec = sysctl_net_busy_read; #endif /* diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 1a298cb3dae..66096861663 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -300,15 +300,15 @@ static struct ctl_table net_core_table[] = { #endif /* CONFIG_NET_FLOW_LIMIT */ #ifdef CONFIG_NET_LL_RX_POLL { - .procname = "low_latency_poll", - .data = &sysctl_net_ll_poll, + .procname = "busy_poll", + .data = &sysctl_net_busy_poll, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec }, { - .procname = "low_latency_read", - .data = &sysctl_net_ll_read, + .procname = "busy_read", + .data = &sysctl_net_busy_read, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec diff --git a/net/socket.c b/net/socket.c index 6a3e9a3f50a..829b460acb8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -107,8 +107,8 @@ #include #ifdef CONFIG_NET_LL_RX_POLL -unsigned int sysctl_net_ll_read __read_mostly; -unsigned int sysctl_net_ll_poll __read_mostly; +unsigned int sysctl_net_busy_read __read_mostly; +unsigned int sysctl_net_busy_poll __read_mostly; #endif static int sock_no_open(struct inode *irrelevant, struct file *dontcare); -- cgit v1.2.3 From 1eb4f758286884e7566627164bca4c4a16952a83 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 10 Jul 2013 23:00:57 +0200 Subject: ipv6: in case of link failure remove route directly instead of letting it expire We could end up expiring a route which is part of an ecmp route set. Doing so would invalidate the rt->rt6i_nsiblings calculations and could provoke the following panic: [ 80.144667] ------------[ cut here ]------------ [ 80.145172] kernel BUG at net/ipv6/ip6_fib.c:733! [ 80.145172] invalid opcode: 0000 [#1] SMP [ 80.145172] Modules linked in: 8021q nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat nf_nat_ipv4 nf_nat iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables +snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm snd_page_alloc snd_timer virtio_balloon snd soundcore i2c_piix4 i2c_core virtio_net virtio_blk [ 80.145172] CPU: 1 PID: 786 Comm: ping6 Not tainted 3.10.0+ #118 [ 80.145172] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 80.145172] task: ffff880117fa0000 ti: ffff880118770000 task.ti: ffff880118770000 [ 80.145172] RIP: 0010:[] [] fib6_add+0x75d/0x830 [ 80.145172] RSP: 0018:ffff880118771798 EFLAGS: 00010202 [ 80.145172] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88011350e480 [ 80.145172] RDX: ffff88011350e238 RSI: 0000000000000004 RDI: ffff88011350f738 [ 80.145172] RBP: ffff880118771848 R08: ffff880117903280 R09: 0000000000000001 [ 80.145172] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011350f680 [ 80.145172] R13: ffff880117903280 R14: ffff880118771890 R15: ffff88011350ef90 [ 80.145172] FS: 00007f02b5127740(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000 [ 80.145172] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 80.145172] CR2: 00007f981322a000 CR3: 00000001181b1000 CR4: 00000000000006e0 [ 80.145172] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 80.145172] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 80.145172] Stack: [ 80.145172] 0000000000000001 ffff880100000000 ffff880100000000 ffff880117903280 [ 80.145172] 0000000000000000 ffff880119a4cf00 0000000000000400 00000000000007fa [ 80.145172] 0000000000000000 0000000000000000 0000000000000000 ffff88011350f680 [ 80.145172] Call Trace: [ 80.145172] [] ? rt6_bind_peer+0x4b/0x90 [ 80.145172] [] __ip6_ins_rt+0x45/0x70 [ 80.145172] [] ip6_ins_rt+0x35/0x40 [ 80.145172] [] ip6_pol_route.isra.44+0x3a4/0x4b0 [ 80.145172] [] ip6_pol_route_output+0x2a/0x30 [ 80.145172] [] fib6_rule_action+0xd7/0x210 [ 80.145172] [] ? ip6_pol_route_input+0x30/0x30 [ 80.145172] [] fib_rules_lookup+0xc6/0x140 [ 80.145172] [] fib6_rule_lookup+0x44/0x80 [ 80.145172] [] ? ip6_pol_route_input+0x30/0x30 [ 80.145172] [] ip6_route_output+0x73/0xb0 [ 80.145172] [] ip6_dst_lookup_tail+0x2c3/0x2e0 [ 80.145172] [] ? list_del+0x11/0x40 [ 80.145172] [] ? remove_wait_queue+0x3c/0x50 [ 80.145172] [] ip6_dst_lookup_flow+0x3d/0xa0 [ 80.145172] [] rawv6_sendmsg+0x267/0xc20 [ 80.145172] [] inet_sendmsg+0x63/0xb0 [ 80.145172] [] ? selinux_socket_sendmsg+0x23/0x30 [ 80.145172] [] sock_sendmsg+0xa6/0xd0 [ 80.145172] [] SYSC_sendto+0x128/0x180 [ 80.145172] [] ? update_curr+0xec/0x170 [ 80.145172] [] ? kvm_clock_get_cycles+0x9/0x10 [ 80.145172] [] ? __getnstimeofday+0x3e/0xd0 [ 80.145172] [] SyS_sendto+0xe/0x10 [ 80.145172] [] system_call_fastpath+0x16/0x1b [ 80.145172] Code: fe ff ff 41 f6 45 2a 06 0f 85 ca fe ff ff 49 8b 7e 08 4c 89 ee e8 94 ef ff ff e9 b9 fe ff ff 48 8b 82 28 05 00 00 e9 01 ff ff ff <0f> 0b 49 8b 54 24 30 0d 00 00 40 00 89 83 14 01 00 00 48 89 53 [ 80.145172] RIP [] fib6_add+0x75d/0x830 [ 80.145172] RSP [ 80.387413] ---[ end trace 02f20b7a8b81ed95 ]--- [ 80.390154] Kernel panic - not syncing: Fatal exception in interrupt Cc: Nicolas Dichtel Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bd5fd705403..5b127e09c22 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1080,10 +1080,13 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { - if (rt->rt6i_flags & RTF_CACHE) - rt6_update_expires(rt, 0); - else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) + if (rt->rt6i_flags & RTF_CACHE) { + dst_hold(&rt->dst); + if (ip6_del_rt(rt)) + dst_free(&rt->dst); + } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { rt->rt6i_node->fn_sernum = -1; + } } } -- cgit v1.2.3 From 110ecd69a9feea82a152bbf9b12aba57e6396883 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 11 Jul 2013 13:16:54 -0400 Subject: 9p: fix off by one causing access violations and memory corruption p9_release_pages() would attempt to dereference one value past the end of pages[]. This would cause the following crashes: [ 6293.171817] BUG: unable to handle kernel paging request at ffff8807c96f3000 [ 6293.174146] IP: [] p9_release_pages+0x3b/0x60 [ 6293.176447] PGD 79c5067 PUD 82c1e3067 PMD 82c197067 PTE 80000007c96f3060 [ 6293.180060] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 6293.180060] Modules linked in: [ 6293.180060] CPU: 62 PID: 174043 Comm: modprobe Tainted: G W 3.10.0-next-20130710-sasha #3954 [ 6293.180060] task: ffff8807b803b000 ti: ffff880787dde000 task.ti: ffff880787dde000 [ 6293.180060] RIP: 0010:[] [] p9_release_pages+0x3b/0x60 [ 6293.214316] RSP: 0000:ffff880787ddfc28 EFLAGS: 00010202 [ 6293.214316] RAX: 0000000000000001 RBX: ffff8807c96f2ff8 RCX: 0000000000000000 [ 6293.222017] RDX: ffff8807b803b000 RSI: 0000000000000001 RDI: ffffea001c7e3d40 [ 6293.222017] RBP: ffff880787ddfc48 R08: 0000000000000000 R09: 0000000000000000 [ 6293.222017] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000001 [ 6293.222017] R13: 0000000000000001 R14: ffff8807cc50c070 R15: ffff8807cc50c070 [ 6293.222017] FS: 00007f572641d700(0000) GS:ffff8807f3600000(0000) knlGS:0000000000000000 [ 6293.256784] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 6293.256784] CR2: ffff8807c96f3000 CR3: 00000007c8e81000 CR4: 00000000000006e0 [ 6293.256784] Stack: [ 6293.256784] ffff880787ddfcc8 ffff880787ddfcc8 0000000000000000 ffff880787ddfcc8 [ 6293.256784] ffff880787ddfd48 ffffffff84128be8 ffff880700000002 0000000000000001 [ 6293.256784] ffff8807b803b000 ffff880787ddfce0 0000100000000000 0000000000000000 [ 6293.256784] Call Trace: [ 6293.256784] [] p9_virtio_zc_request+0x598/0x630 [ 6293.256784] [] ? wake_up_bit+0x40/0x40 [ 6293.256784] [] p9_client_zc_rpc+0x111/0x3a0 [ 6293.256784] [] ? sched_clock_cpu+0x108/0x120 [ 6293.256784] [] p9_client_read+0xe1/0x2c0 [ 6293.256784] [] v9fs_file_read+0x90/0xc0 [ 6293.256784] [] vfs_read+0xc3/0x130 [ 6293.256784] [] ? trace_hardirqs_on+0xd/0x10 [ 6293.256784] [] SyS_read+0x62/0xa0 [ 6293.256784] [] tracesys+0xdd/0xe2 [ 6293.256784] Code: 66 90 48 89 fb 41 89 f5 48 8b 3f 48 85 ff 74 29 85 f6 74 25 45 31 e4 66 0f 1f 84 00 00 00 00 00 e8 eb 14 12 fd 41 ff c4 49 63 c4 <48> 8b 3c c3 48 85 ff 74 05 45 39 e5 75 e7 48 83 c4 08 5b 41 5c [ 6293.256784] RIP [] p9_release_pages+0x3b/0x60 [ 6293.256784] RSP [ 6293.256784] CR2: ffff8807c96f3000 [ 6293.256784] ---[ end trace 50822ee72cd360fc ]--- Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- net/9p/trans_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index de8df957867..2ee3879161b 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -24,11 +24,11 @@ */ void p9_release_pages(struct page **pages, int nr_pages) { - int i = 0; - while (pages[i] && nr_pages--) { - put_page(pages[i]); - i++; - } + int i; + + for (i = 0; i < nr_pages; i++) + if (pages[i]) + put_page(pages[i]); } EXPORT_SYMBOL(p9_release_pages); -- cgit v1.2.3 From afc154e978de1eb11c555bc8bcec1552f75ebc43 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 11 Jul 2013 12:43:42 +0200 Subject: ipv6: fix route selection if kernel is not compiled with CONFIG_IPV6_ROUTER_PREF This is a follow-up patch to 3630d40067a21d4dfbadc6002bb469ce26ac5d52 ("ipv6: rt6_check_neigh should successfully verify neigh if no NUD information are available"). Since the removal of rt->n in rt6_info we can end up with a dst == NULL in rt6_check_neigh. In case the kernel is not compiled with CONFIG_IPV6_ROUTER_PREF we should also select a route with unkown NUD state but we must not avoid doing round robin selection on routes with the same target. So introduce and pass down a boolean ``do_rr'' to indicate when we should update rt->rr_ptr. As soon as no route is valid we do backtracking and do a lookup on a higher level in the fib trie. v2: a) Improved rt6_check_neigh logic (no need to create neighbour there) and documented return values. v3: a) Introduce enum rt6_nud_state to get rid of the magic numbers (thanks to David Miller). b) Update and shorten commit message a bit to actualy reflect the source. Reported-by: Pierre Emeriaud Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 63 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5b127e09c22..a8c891aa246 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -65,6 +65,12 @@ #include #endif +enum rt6_nud_state { + RT6_NUD_FAIL_HARD = -2, + RT6_NUD_FAIL_SOFT = -1, + RT6_NUD_SUCCEED = 1 +}; + static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); @@ -531,28 +537,29 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) return 0; } -static inline bool rt6_check_neigh(struct rt6_info *rt) +static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) { struct neighbour *neigh; - bool ret = false; + enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) - return true; + return RT6_NUD_SUCCEED; rcu_read_lock_bh(); neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) - ret = true; + ret = RT6_NUD_SUCCEED; #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) - ret = true; + ret = RT6_NUD_SUCCEED; #endif read_unlock(&neigh->lock); - } else if (IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { - ret = true; + } else { + ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? + RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT; } rcu_read_unlock_bh(); @@ -566,43 +573,52 @@ static int rt6_score_route(struct rt6_info *rt, int oif, m = rt6_check_dev(rt, oif); if (!m && (strict & RT6_LOOKUP_F_IFACE)) - return -1; + return RT6_NUD_FAIL_HARD; #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; #endif - if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE)) - return -1; + if (strict & RT6_LOOKUP_F_REACHABLE) { + int n = rt6_check_neigh(rt); + if (n < 0) + return n; + } return m; } static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, - int *mpri, struct rt6_info *match) + int *mpri, struct rt6_info *match, + bool *do_rr) { int m; + bool match_do_rr = false; if (rt6_check_expired(rt)) goto out; m = rt6_score_route(rt, oif, strict); - if (m < 0) + if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + match_do_rr = true; + m = 0; /* lowest valid score */ + } else if (m < 0) { goto out; + } + + if (strict & RT6_LOOKUP_F_REACHABLE) + rt6_probe(rt); if (m > *mpri) { - if (strict & RT6_LOOKUP_F_REACHABLE) - rt6_probe(match); + *do_rr = match_do_rr; *mpri = m; match = rt; - } else if (strict & RT6_LOOKUP_F_REACHABLE) { - rt6_probe(rt); } - out: return match; } static struct rt6_info *find_rr_leaf(struct fib6_node *fn, struct rt6_info *rr_head, - u32 metric, int oif, int strict) + u32 metric, int oif, int strict, + bool *do_rr) { struct rt6_info *rt, *match; int mpri = -1; @@ -610,10 +626,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; for (rt = rr_head; rt && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; } @@ -622,15 +638,16 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) { struct rt6_info *match, *rt0; struct net *net; + bool do_rr = false; rt0 = fn->rr_ptr; if (!rt0) fn->rr_ptr = rt0 = fn->leaf; - match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); + match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, + &do_rr); - if (!match && - (strict & RT6_LOOKUP_F_REACHABLE)) { + if (do_rr) { struct rt6_info *next = rt0->dst.rt6_next; /* no entries matched; do round-robin */ -- cgit v1.2.3 From 3b8ccd447375acebed9af0a3798e1ab4e58bedf4 Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Thu, 11 Jul 2013 09:55:51 +0300 Subject: inet: fix spacing in assignment Found using checkpatch.pl Signed-off-by: Camelia Groza Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 6af375afeee..7bd8983dbfc 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -467,7 +467,7 @@ void inet_unhash(struct sock *sk) lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock_bh(lock); - done =__sk_nulls_del_node_init_rcu(sk); + done = __sk_nulls_del_node_init_rcu(sk); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); -- cgit v1.2.3 From cdbaa0bb26d8116d00be24e6b49043777b382f3a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 10 Jul 2013 17:05:06 -0700 Subject: gso: Update tunnel segmentation to support Tx checksum offload This change makes it so that the GRE and VXLAN tunnels can make use of Tx checksum offload support provided by some drivers via the hw_enc_features. Without this fix enabling GSO means sacrificing Tx checksum offload and this actually leads to a performance regression as shown below: Utilization Send Throughput local GSO 10^6bits/s % S state 6276.51 8.39 enabled 7123.52 8.42 disabled To resolve this it was necessary to address two items. First netif_skb_features needed to be updated so that it would correctly handle the Trans Ether Bridging protocol without impacting the need to check for Q-in-Q tagging. To do this it was necessary to update harmonize_features so that it used skb_network_protocol instead of just using the outer protocol. Second it was necessary to update the GRE and UDP tunnel segmentation offloads so that they would reset the encapsulation bit and inner header offsets after the offload was complete. As a result of this change I have seen the following results on a interface with Tx checksum enabled for encapsulated frames: Utilization Send Throughput local GSO 10^6bits/s % S state 7123.52 8.42 disabled 8321.75 5.43 enabled v2: Instead of replacing refrence to skb->protocol with skb_network_protocol just replace the protocol reference in harmonize_features to allow for double VLAN tag checks. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/dev.c | 14 ++++++-------- net/ipv4/gre_offload.c | 3 +++ net/ipv4/udp.c | 4 +++- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 560dafd83ad..a3d8d44cb7f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2481,10 +2481,10 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) } static netdev_features_t harmonize_features(struct sk_buff *skb, - __be16 protocol, netdev_features_t features) + netdev_features_t features) { if (skb->ip_summed != CHECKSUM_NONE && - !can_checksum_protocol(features, protocol)) { + !can_checksum_protocol(features, skb_network_protocol(skb))) { features &= ~NETIF_F_ALL_CSUM; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; @@ -2505,20 +2505,18 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, protocol, features); + return harmonize_features(skb, features); } features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); - if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) { - return harmonize_features(skb, protocol, features); - } else { + if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, protocol, features); - } + + return harmonize_features(skb, features); } EXPORT_SYMBOL(netif_skb_features); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 775d5b532ec..55e6bfb3a28 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -100,6 +100,9 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } __skb_push(skb, tnl_hlen - ghl); + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb->mac_len = mac_len; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a0d7151ffbd..766e6bab911 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2323,6 +2323,9 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, struct udphdr *uh; int udp_offset = outer_hlen - tnl_hlen; + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + skb->mac_len = mac_len; skb_push(skb, outer_hlen); @@ -2345,7 +2348,6 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, uh->check = CSUM_MANGLED_0; } - skb->ip_summed = CHECKSUM_NONE; skb->protocol = protocol; } while ((skb = skb->next)); out: -- cgit v1.2.3 From 87f1369d6e2e820c77cf9eac542eed4dcf036f64 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 10 Jul 2013 15:46:08 +0200 Subject: pkt_sched: sch_qfq: improve efficiency of make_eligible In make_eligible, a mask is used to decide which groups must become eligible: the i-th group becomes eligible only if the i-th bit of the mask (from the right) is set. The mask is computed by left-shifting a 1 by a given number of places, and decrementing the result. The shift is performed on a ULL to avoid problems in case the number of places to shift is higher than 31. On a 32-bit machine, this is more costly than working on an UL. This patch replaces such a costly operation with two cheaper branches. The trick is based on the following fact: in case of a shift of at least 32 places, the resulting mask has at least the 32 less significant bits set, whereas the total number of groups is lower than 32. As a consequence, in this case it is enough to just set the 32 less significant bits of the mask with a cheaper ~0UL. In the other case, the shift can be safely performed on a UL. Reported-by: David S. Miller Reported-by: David Laight Signed-off-by: Paolo Valente Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 7c195d972bf..8d86a8b5522 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -821,7 +821,14 @@ static void qfq_make_eligible(struct qfq_sched *q) unsigned long old_vslot = q->oldV >> q->min_slot_shift; if (vslot != old_vslot) { - unsigned long mask = (1ULL << fls(vslot ^ old_vslot)) - 1; + unsigned long mask; + int last_flip_pos = fls(vslot ^ old_vslot); + + if (last_flip_pos > 31) /* higher than the number of groups */ + mask = ~0UL; /* make all groups eligible */ + else + mask = (1UL << last_flip_pos) - 1; + qfq_move_groups(q, mask, IR, ER); qfq_move_groups(q, mask, IB, EB); } -- cgit v1.2.3 From 88d4f419a43b474a4524f41f55c36bee13416bdd Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 10 Jul 2013 15:46:09 +0200 Subject: pkt_sched: sch_qfq: remove forward declaration of qfq_update_agg_ts This patch removes the forward declaration of qfq_update_agg_ts, by moving the definition of the function above its first call. This patch also removes a useless forward declaration of qfq_schedule_agg. Reported-by: David S. Miller Signed-off-by: Paolo Valente Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 118 ++++++++++++++++++++++++---------------------------- 1 file changed, 55 insertions(+), 63 deletions(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8d86a8b5522..a7ab323849b 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1010,9 +1010,61 @@ static inline void charge_actual_service(struct qfq_aggregate *agg) agg->F = agg->S + (u64)service_received * agg->inv_w; } -static inline void qfq_update_agg_ts(struct qfq_sched *q, - struct qfq_aggregate *agg, - enum update_reason reason); +/* Assign a reasonable start time for a new aggregate in group i. + * Admissible values for \hat(F) are multiples of \sigma_i + * no greater than V+\sigma_i . Larger values mean that + * we had a wraparound so we consider the timestamp to be stale. + * + * If F is not stale and F >= V then we set S = F. + * Otherwise we should assign S = V, but this may violate + * the ordering in EB (see [2]). So, if we have groups in ER, + * set S to the F_j of the first group j which would be blocking us. + * We are guaranteed not to move S backward because + * otherwise our group i would still be blocked. + */ +static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg) +{ + unsigned long mask; + u64 limit, roundedF; + int slot_shift = agg->grp->slot_shift; + + roundedF = qfq_round_down(agg->F, slot_shift); + limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift); + + if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) { + /* timestamp was stale */ + mask = mask_from(q->bitmaps[ER], agg->grp->index); + if (mask) { + struct qfq_group *next = qfq_ffs(q, mask); + if (qfq_gt(roundedF, next->F)) { + if (qfq_gt(limit, next->F)) + agg->S = next->F; + else /* preserve timestamp correctness */ + agg->S = limit; + return; + } + } + agg->S = q->V; + } else /* timestamp is not stale */ + agg->S = agg->F; +} + +/* Update the timestamps of agg before scheduling/rescheduling it for + * service. In particular, assign to agg->F its maximum possible + * value, i.e., the virtual finish time with which the aggregate + * should be labeled if it used all its budget once in service. + */ +static inline void +qfq_update_agg_ts(struct qfq_sched *q, + struct qfq_aggregate *agg, enum update_reason reason) +{ + if (reason != requeue) + qfq_update_start(q, agg); + else /* just charge agg for the service received */ + agg->S = agg->F; + + agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w; +} static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg); @@ -1135,66 +1187,6 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) return agg; } -/* - * Assign a reasonable start time for a new aggregate in group i. - * Admissible values for \hat(F) are multiples of \sigma_i - * no greater than V+\sigma_i . Larger values mean that - * we had a wraparound so we consider the timestamp to be stale. - * - * If F is not stale and F >= V then we set S = F. - * Otherwise we should assign S = V, but this may violate - * the ordering in EB (see [2]). So, if we have groups in ER, - * set S to the F_j of the first group j which would be blocking us. - * We are guaranteed not to move S backward because - * otherwise our group i would still be blocked. - */ -static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg) -{ - unsigned long mask; - u64 limit, roundedF; - int slot_shift = agg->grp->slot_shift; - - roundedF = qfq_round_down(agg->F, slot_shift); - limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift); - - if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) { - /* timestamp was stale */ - mask = mask_from(q->bitmaps[ER], agg->grp->index); - if (mask) { - struct qfq_group *next = qfq_ffs(q, mask); - if (qfq_gt(roundedF, next->F)) { - if (qfq_gt(limit, next->F)) - agg->S = next->F; - else /* preserve timestamp correctness */ - agg->S = limit; - return; - } - } - agg->S = q->V; - } else /* timestamp is not stale */ - agg->S = agg->F; -} - -/* - * Update the timestamps of agg before scheduling/rescheduling it for - * service. In particular, assign to agg->F its maximum possible - * value, i.e., the virtual finish time with which the aggregate - * should be labeled if it used all its budget once in service. - */ -static inline void -qfq_update_agg_ts(struct qfq_sched *q, - struct qfq_aggregate *agg, enum update_reason reason) -{ - if (reason != requeue) - qfq_update_start(q, agg); - else /* just charge agg for the service received */ - agg->S = agg->F; - - agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w; -} - -static void qfq_schedule_agg(struct qfq_sched *, struct qfq_aggregate *); - static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); -- cgit v1.2.3 From 8c91e162e058bb91b7766f26f4d5823a21941026 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 11 Jul 2013 13:12:22 -0700 Subject: gre: Fix MTU sizing check for gretap tunnels This change fixes an MTU sizing issue seen with gretap tunnels when non-gso packets are sent from the interface. In my case I was able to reproduce the issue by simply sending a ping of 1421 bytes with the gretap interface created on a device with a standard 1500 mtu. This fix is based on the fact that the tunnel mtu is already adjusted by dev->hard_header_len so it would make sense that any packets being compared against that mtu should also be adjusted by hard_header_len and the tunnel header instead of just the tunnel header. Signed-off-by: Alexander Duyck Reported-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 945734b2f20..ca1cb2d5f6e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -476,7 +476,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, struct rtable *rt, __be16 df) { struct ip_tunnel *tunnel = netdev_priv(dev); - int pkt_size = skb->len - tunnel->hlen; + int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; int mtu; if (df) -- cgit v1.2.3 From d77e41e12744e53ca7f98f920350998b5f00c93a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 10 Jul 2013 17:30:34 +0300 Subject: net/tipc: use %*phC to dump small buffers in hex form Instead of passing each byte by stack let's use nice specifier for that. Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- net/tipc/ib_media.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net') diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index ad2e1ec4117..9934a32bfa8 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -292,13 +292,7 @@ static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */ return 1; - sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:" - "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", - a->value[0], a->value[1], a->value[2], a->value[3], - a->value[4], a->value[5], a->value[6], a->value[7], - a->value[8], a->value[9], a->value[10], a->value[11], - a->value[12], a->value[13], a->value[14], a->value[15], - a->value[16], a->value[17], a->value[18], a->value[19]); + sprintf(str_buf, "%20phC", a->value); return 0; } -- cgit v1.2.3 From 92338dc2fb33c8526256a458a520af73d9ab2d14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CCosmin?= Date: Fri, 12 Jul 2013 09:33:33 +0300 Subject: net: strict_strtoul is obsolete, use kstrtoul instead patch found using checkpatch.pl Signed-off-by: Cosmin Stanescu Signed-off-by: David S. Miller --- net/dns_resolver/dns_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 0a69d075779..f347a2ca7d7 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -118,7 +118,7 @@ dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep) if (opt_vlen <= 0) goto bad_option_value; - ret = strict_strtoul(eq, 10, &derrno); + ret = kstrtoul(eq, 10, &derrno); if (ret < 0) goto bad_option_value; -- cgit v1.2.3 From 40dadff26539d1695d2a37b44f66c53158439ae9 Mon Sep 17 00:00:00 2001 From: Sunghan Suh Date: Fri, 12 Jul 2013 16:17:23 +0900 Subject: net: access page->private by using page_private Signed-off-by: Sunghan Suh Signed-off-by: David S. Miller --- net/core/skbuff.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 724bb7cb173..20e02d2605e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -824,7 +824,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) page = alloc_page(gfp_mask); if (!page) { while (head) { - struct page *next = (struct page *)head->private; + struct page *next = (struct page *)page_private(head); put_page(head); head = next; } @@ -834,7 +834,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) memcpy(page_address(page), vaddr + f->page_offset, skb_frag_size(f)); kunmap_atomic(vaddr); - page->private = (unsigned long)head; + set_page_private(page, (unsigned long)head); head = page; } @@ -848,7 +848,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) for (i = num_frags - 1; i >= 0; i--) { __skb_fill_page_desc(skb, i, head, 0, skb_shinfo(skb)->frags[i].size); - head = (struct page *)head->private; + head = (struct page *)page_private(head); } skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; -- cgit v1.2.3 From 24ab6bec80861d0c55263047e8bf97e460a32e7b Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 12 Jul 2013 11:33:04 -0700 Subject: tcp: account all retransmit failures Change snmp RETRANSFAILS stat to include timeout retransmit failures in addition to other loss recoveries. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3d609490f11..92fde8d1aa8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2407,6 +2407,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * see tcp_input.c tcp_sacktag_write_queue(). */ TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; + } else { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } return err; } @@ -2528,10 +2530,9 @@ begin_fwd: if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) continue; - if (tcp_retransmit_skb(sk, skb)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); + if (tcp_retransmit_skb(sk, skb)) return; - } + NET_INC_STATS_BH(sock_net(sk), mib_idx); if (tcp_in_cwnd_reduction(sk)) -- cgit v1.2.3 From 307f2fb95e9b96b3577916e73d92e104f8f26494 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 12 Jul 2013 23:46:33 +0200 Subject: ipv6: only static routes qualify for equal cost multipathing Static routes in this case are non-expiring routes which did not get configured by autoconf or by icmpv6 redirects. To make sure we actually get an ecmp route while searching for the first one in this fib6_node's leafs, also make sure it matches the ecmp route assumptions. v2: a) Removed RTF_EXPIRE check in dst.from chain. The check of RTF_ADDRCONF already ensures that this route, even if added again without RTF_EXPIRES (in case of a RA announcement with infinite timeout), does not cause the rt6i_nsiblings logic to go wrong if a later RA updates the expiration time later. v3: a) Allow RTF_EXPIRES routes to enter the ecmp route set. We have to do so, because an pmtu event could update the RTF_EXPIRES flag and we would not count this route, if another route joins this set. We now filter only for RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC, which are flags that don't get changed after rt6_info construction. Cc: Nicolas Dichtel Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 192dd1a0e18..5fc9c7a68d8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -632,6 +632,12 @@ insert_above: return ln; } +static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) +{ + return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + RTF_GATEWAY; +} + /* * Insert routing information in a node. */ @@ -646,6 +652,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, int add = (!info->nlh || (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; + bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); ins = &fn->leaf; @@ -691,9 +698,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * To avoid long list, we only had siblings if the * route have a gateway. */ - if (rt->rt6i_flags & RTF_GATEWAY && - !(rt->rt6i_flags & RTF_EXPIRES) && - !(iter->rt6i_flags & RTF_EXPIRES)) + if (rt_can_ecmp && + rt6_qualify_for_ecmp(iter)) rt->rt6i_nsiblings++; } @@ -715,7 +721,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, /* Find the first route that have the same metric */ sibling = fn->leaf; while (sibling) { - if (sibling->rt6i_metric == rt->rt6i_metric) { + if (sibling->rt6i_metric == rt->rt6i_metric && + rt6_qualify_for_ecmp(sibling)) { list_add_tail(&rt->rt6i_siblings, &sibling->rt6i_siblings); break; -- cgit v1.2.3 From a95e691f9c4a6e24fdeab6d7feae6d5411fe8a69 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Jul 2013 16:43:54 +0400 Subject: rpc_create_*_dir: don't bother with qstr just pass the name Signed-off-by: Al Viro --- net/sunrpc/cache.c | 18 +++++------------- net/sunrpc/clnt.c | 20 ++++++++------------ net/sunrpc/rpc_pipe.c | 14 ++++++++------ 3 files changed, 21 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 80fe5c86efd..b40f9567e62 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1812,19 +1812,11 @@ int sunrpc_cache_register_pipefs(struct dentry *parent, const char *name, umode_t umode, struct cache_detail *cd) { - struct qstr q; - struct dentry *dir; - int ret = 0; - - q.name = name; - q.len = strlen(name); - q.hash = full_name_hash(q.name, q.len); - dir = rpc_create_cache_dir(parent, &q, umode, cd); - if (!IS_ERR(dir)) - cd->u.pipefs.dir = dir; - else - ret = PTR_ERR(dir); - return ret; + struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd); + if (IS_ERR(dir)) + return PTR_ERR(dir); + cd->u.pipefs.dir = dir; + return 0; } EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5a750b9c364..26456274b24 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -128,9 +128,7 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, { static uint32_t clntid; char name[15]; - struct qstr q = { .name = name }; struct dentry *dir, *dentry; - int error; dir = rpc_d_lookup_sb(sb, dir_name); if (dir == NULL) { @@ -138,19 +136,17 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, return dir; } for (;;) { - q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); + snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); name[sizeof(name) - 1] = '\0'; - q.hash = full_name_hash(q.name, q.len); - dentry = rpc_create_client_dir(dir, &q, clnt); + dentry = rpc_create_client_dir(dir, name, clnt); if (!IS_ERR(dentry)) break; - error = PTR_ERR(dentry); - if (error != -EEXIST) { - printk(KERN_INFO "RPC: Couldn't create pipefs entry" - " %s/%s, error %d\n", - dir_name, name, error); - break; - } + if (dentry == ERR_PTR(-EEXIST)) + continue; + printk(KERN_INFO "RPC: Couldn't create pipefs entry" + " %s/%s, error %ld\n", + dir_name, name, PTR_ERR(dentry)); + break; } dput(dir); return dentry; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e7ce4b3eb0b..63364cb5d11 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -770,15 +770,17 @@ out_bad: } static struct dentry *rpc_mkdir_populate(struct dentry *parent, - struct qstr *name, umode_t mode, void *private, + const char *name, umode_t mode, void *private, int (*populate)(struct dentry *, void *), void *args_populate) { struct dentry *dentry; + struct qstr q = QSTR_INIT(name, strlen(name)); struct inode *dir = parent->d_inode; int error; + q.hash = full_name_hash(q.name, q.len); mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = __rpc_lookup_create_exclusive(parent, name); + dentry = __rpc_lookup_create_exclusive(parent, &q); if (IS_ERR(dentry)) goto out; error = __rpc_mkdir(dir, dentry, mode, NULL, private); @@ -925,8 +927,8 @@ static void rpc_clntdir_depopulate(struct dentry *dentry) /** * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs - * @dentry: dentry from the rpc_pipefs root to the new directory - * @name: &struct qstr for the name + * @dentry: the parent of new directory + * @name: the name of new directory * @rpc_client: rpc client to associate with this directory * * This creates a directory at the given @path associated with @@ -935,7 +937,7 @@ static void rpc_clntdir_depopulate(struct dentry *dentry) * later be created using rpc_mkpipe(). */ struct dentry *rpc_create_client_dir(struct dentry *dentry, - struct qstr *name, + const char *name, struct rpc_clnt *rpc_client) { return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL, @@ -981,7 +983,7 @@ static void rpc_cachedir_depopulate(struct dentry *dentry) rpc_depopulate(dentry, cache_pipefs_files, 0, 3); } -struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name, +struct dentry *rpc_create_cache_dir(struct dentry *parent, const char *name, umode_t umode, struct cache_detail *cd) { return rpc_mkdir_populate(parent, name, umode, NULL, -- cgit v1.2.3 From d3db90b0a448bfdf77ab3d887c9579fead656cc5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Jul 2013 17:09:57 +0400 Subject: __rpc_lookup_create_exclusive: pass string instead of qstr ... and use d_hash_and_lookup() instead of open-coding it, for fsck sake... Signed-off-by: Al Viro --- net/sunrpc/rpc_pipe.c | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 63364cb5d11..27e54d26570 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -656,13 +656,12 @@ static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) } static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, - struct qstr *name) + const char *name) { - struct dentry *dentry; - - dentry = d_lookup(parent, name); + struct qstr q = QSTR_INIT(name, strlen(name)); + struct dentry *dentry = d_hash_and_lookup(parent, &q); if (!dentry) { - dentry = d_alloc(parent, name); + dentry = d_alloc(parent, &q); if (!dentry) return ERR_PTR(-ENOMEM); } @@ -689,8 +688,7 @@ static void __rpc_depopulate(struct dentry *parent, for (i = start; i < eof; i++) { name.name = files[i].name; name.len = strlen(files[i].name); - name.hash = full_name_hash(name.name, name.len); - dentry = d_lookup(parent, &name); + dentry = d_hash_and_lookup(parent, &name); if (dentry == NULL) continue; @@ -732,12 +730,7 @@ static int rpc_populate(struct dentry *parent, mutex_lock(&dir->i_mutex); for (i = start; i < eof; i++) { - struct qstr q; - - q.name = files[i].name; - q.len = strlen(files[i].name); - q.hash = full_name_hash(q.name, q.len); - dentry = __rpc_lookup_create_exclusive(parent, &q); + dentry = __rpc_lookup_create_exclusive(parent, files[i].name); err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_bad; @@ -774,13 +767,11 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent, int (*populate)(struct dentry *, void *), void *args_populate) { struct dentry *dentry; - struct qstr q = QSTR_INIT(name, strlen(name)); struct inode *dir = parent->d_inode; int error; - q.hash = full_name_hash(q.name, q.len); mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = __rpc_lookup_create_exclusive(parent, &q); + dentry = __rpc_lookup_create_exclusive(parent, name); if (IS_ERR(dentry)) goto out; error = __rpc_mkdir(dir, dentry, mode, NULL, private); @@ -843,7 +834,6 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, struct dentry *dentry; struct inode *dir = parent->d_inode; umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR; - struct qstr q; int err; if (pipe->ops->upcall == NULL) @@ -851,12 +841,8 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, if (pipe->ops->downcall == NULL) umode &= ~S_IWUGO; - q.name = name; - q.len = strlen(name); - q.hash = full_name_hash(q.name, q.len), - mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = __rpc_lookup_create_exclusive(parent, &q); + dentry = __rpc_lookup_create_exclusive(parent, name); if (IS_ERR(dentry)) goto out; err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops, @@ -1063,9 +1049,7 @@ struct dentry *rpc_d_lookup_sb(const struct super_block *sb, const unsigned char *dir_name) { struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name)); - - dir.hash = full_name_hash(dir.name, dir.len); - return d_lookup(sb->s_root, &dir); + return d_hash_and_lookup(sb->s_root, &dir); } EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); -- cgit v1.2.3 From dae3794fd603b92dcbac2859fe0bc7fe129a5188 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Jul 2013 17:55:39 +0400 Subject: sunrpc: now we can just set ->s_d_op Signed-off-by: Al Viro --- net/sunrpc/rpc_pipe.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 27e54d26570..260fe72656a 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -665,10 +665,8 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, if (!dentry) return ERR_PTR(-ENOMEM); } - if (dentry->d_inode == NULL) { - d_set_d_op(dentry, &rpc_dentry_operations); + if (dentry->d_inode == NULL) return dentry; - } dput(dentry); return ERR_PTR(-EEXIST); } @@ -1102,6 +1100,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = RPCAUTH_GSSMAGIC; sb->s_op = &s_ops; + sb->s_d_op = &rpc_dentry_operations; sb->s_time_gran = 1; inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); -- cgit v1.2.3 From 013dbb325be702d777118d5e4ffefff3dad2b153 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 19 Jun 2013 14:32:33 -0400 Subject: net: delete __cpuinit usage from all net files The __cpuinit type of throwaway sections might have made sense some time ago when RAM was more constrained, but now the savings do not offset the cost and complications. For example, the fix in commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time") is a good example of the nasty type of bugs that can be created with improper use of the various __init prefixes. After a discussion on LKML[1] it was decided that cpuinit should go the way of devinit and be phased out. Once all the users are gone, we can then finally remove the macros themselves from linux/init.h. This removes all the net/* uses of the __cpuinit macros from all C files. [1] https://lkml.org/lkml/2013/5/20/589 Cc: "David S. Miller" Cc: netdev@vger.kernel.org Signed-off-by: Paul Gortmaker --- net/core/flow.c | 4 ++-- net/iucv/iucv.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/flow.c b/net/core/flow.c index 7102f166482..dfa602ceb8c 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -403,7 +403,7 @@ void flow_cache_flush_deferred(void) schedule_work(&flow_cache_flush_work); } -static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) +static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); @@ -421,7 +421,7 @@ static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) return 0; } -static int __cpuinit flow_cache_cpu(struct notifier_block *nfb, +static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 4fe76ff214c..cd5b8ec9be0 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -621,7 +621,7 @@ static void iucv_disable(void) put_online_cpus(); } -static int __cpuinit iucv_cpu_notify(struct notifier_block *self, +static int iucv_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { cpumask_t cpumask; -- cgit v1.2.3 From f09eca8db0184aeb6b9718a987cfb3653ad7c4ae Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 9 Jul 2013 20:16:39 +0200 Subject: netfilter: ctnetlink: fix incorrect NAT expectation dumping nf_ct_expect_alloc leaves unset the expectation NAT fields. However, ctnetlink_exp_dump_expect expects them to be zeroed in case they are not used, which may not be the case. This results in dumping the NAT tuple of the expectation when it should not. Fix it by zeroing the NAT fields of the expectation. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_expect.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index c63b618cd61..4fd1ca94fd4 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -293,6 +293,11 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, sizeof(exp->tuple.dst.u3) - len); exp->tuple.dst.u.all = *dst; + +#ifdef CONFIG_NF_NAT_NEEDED + memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); + memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); +#endif } EXPORT_SYMBOL_GPL(nf_ct_expect_init); -- cgit v1.2.3 From baf60efa585c78b269f0097288868a51ccc61f55 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Jul 2013 19:22:19 -0700 Subject: netfilter: xt_socket: fix broken v0 support commit 681f130f39e10 ("netfilter: xt_socket: add XT_SOCKET_NOWILDCARD flag") added a potential NULL dereference if an old iptables package uses v0 of the match. Fix this by removing the test on @info in fast path. IPv6 can remove the test as well, as it uses v1 or v2. Reported-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index f8b71911037..20b15916f40 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -172,7 +172,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ - if (info && info->flags & XT_SOCKET_TRANSPARENT) + if (info->flags & XT_SOCKET_TRANSPARENT) transparent = ((sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->transparent) || (sk->sk_state == TCP_TIME_WAIT && @@ -196,7 +196,11 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, static bool socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) { - return socket_match(skb, par, NULL); + static struct xt_socket_mtinfo1 xt_info_v0 = { + .flags = 0, + }; + + return socket_match(skb, par, &xt_info_v0); } static bool @@ -314,7 +318,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ - if (info && info->flags & XT_SOCKET_TRANSPARENT) + if (info->flags & XT_SOCKET_TRANSPARENT) transparent = ((sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->transparent) || (sk->sk_state == TCP_TIME_WAIT && -- cgit v1.2.3 From 1540c5d3cbf7670eb68a0d02611ec73e5604a91a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Jul 2013 22:57:50 -0400 Subject: SUNRPC: Fix another issue with rpc_client_register() Fix the error pathway if rpcauth_create() fails. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9963584605c..74f6a704e37 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -309,6 +309,7 @@ static int rpc_client_register(const struct rpc_create_args *args, return 0; err_auth: pipefs_sb = rpc_get_sb_net(net); + rpc_unregister_client(clnt); __rpc_clnt_remove_pipedir(clnt); out: if (pipefs_sb) -- cgit v1.2.3 From b2781e1021525649c0b33fffd005ef219da33926 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jul 2013 09:39:03 +0300 Subject: svcrdma: underflow issue in decode_write_list() My static checker marks everything from ntohl() as untrusted and it complains we could have an underflow problem doing: return (u32 *)&ary->wc_array[nchunks]; Also on 32 bit systems the upper bound check could overflow. Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_marshal.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 8d2edddf48c..65b146297f5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -98,6 +98,7 @@ void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, */ static u32 *decode_write_list(u32 *va, u32 *vaend) { + unsigned long start, end; int nchunks; struct rpcrdma_write_array *ary = @@ -113,9 +114,12 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) return NULL; } nchunks = ntohl(ary->wc_nchunks); - if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * nchunks)) > - (unsigned long)vaend) { + + start = (unsigned long)&ary->wc_array[0]; + end = (unsigned long)vaend; + if (nchunks < 0 || + nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || + (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", ary, nchunks, vaend); return NULL; @@ -129,6 +133,7 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) static u32 *decode_reply_array(u32 *va, u32 *vaend) { + unsigned long start, end; int nchunks; struct rpcrdma_write_array *ary = (struct rpcrdma_write_array *)va; @@ -143,9 +148,12 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend) return NULL; } nchunks = ntohl(ary->wc_nchunks); - if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * nchunks)) > - (unsigned long)vaend) { + + start = (unsigned long)&ary->wc_array[0]; + end = (unsigned long)vaend; + if (nchunks < 0 || + nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || + (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", ary, nchunks, vaend); return NULL; -- cgit v1.2.3 From a0ec570f4f69c4cb700d743a915096c2c8f56a99 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Tue, 25 Jun 2013 09:17:17 +0200 Subject: nl80211: fix mgmt tx status and testmode reporting for netns These two events were sent to the default network namespace. This caused AP mode in a non-default netns to not work correctly. Mgmt tx status was multicasted to a different (default) netns instead of the one the AP was in. Cc: stable@vger.kernel.org Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1cc47aca7f0..9fb8820b75c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6613,12 +6613,14 @@ EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb); void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) { + struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; nla_nest_end(skb, data); genlmsg_end(skb, hdr); - genlmsg_multicast(skb, 0, nl80211_testmode_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0, + nl80211_testmode_mcgrp.id, gfp); } EXPORT_SYMBOL(cfg80211_testmode_event); #endif @@ -10064,7 +10066,8 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, genlmsg_end(msg, hdr); - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: -- cgit v1.2.3 From 923a0e7dee8c436108279568cf34444749ac796f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 Jun 2013 11:38:54 +0200 Subject: cfg80211: fix bugs in new SME implementation When splitting the SME implementation from the MLME code, I introduced a few bugs: * association failures no longer sent a connect-failure event * getting disassociated from the AP caused deauth to be sent but state wasn't cleaned up, leading to warnings * authentication failures weren't cleaned up properly, causing new connection attempts to warn and fail Fix these bugs. Signed-off-by: Johannes Berg --- net/wireless/sme.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 1d3cfb1a3f2..81c8a10d743 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -34,8 +34,10 @@ struct cfg80211_conn { CFG80211_CONN_SCAN_AGAIN, CFG80211_CONN_AUTHENTICATE_NEXT, CFG80211_CONN_AUTHENTICATING, + CFG80211_CONN_AUTH_FAILED, CFG80211_CONN_ASSOCIATE_NEXT, CFG80211_CONN_ASSOCIATING, + CFG80211_CONN_ASSOC_FAILED, CFG80211_CONN_DEAUTH, CFG80211_CONN_CONNECTED, } state; @@ -164,6 +166,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) NULL, 0, params->key, params->key_len, params->key_idx, NULL, 0); + case CFG80211_CONN_AUTH_FAILED: + return -ENOTCONN; case CFG80211_CONN_ASSOCIATE_NEXT: BUG_ON(!rdev->ops->assoc); wdev->conn->state = CFG80211_CONN_ASSOCIATING; @@ -188,10 +192,17 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) WLAN_REASON_DEAUTH_LEAVING, false); return err; + case CFG80211_CONN_ASSOC_FAILED: + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); + return -ENOTCONN; case CFG80211_CONN_DEAUTH: cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); + /* free directly, disconnected event already sent */ + cfg80211_sme_free(wdev); return 0; default: return 0; @@ -371,7 +382,7 @@ bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status) return true; } - wdev->conn->state = CFG80211_CONN_DEAUTH; + wdev->conn->state = CFG80211_CONN_ASSOC_FAILED; schedule_work(&rdev->conn_work); return false; } @@ -383,7 +394,13 @@ void cfg80211_sme_deauth(struct wireless_dev *wdev) void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) { - cfg80211_sme_free(wdev); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_AUTH_FAILED; + schedule_work(&rdev->conn_work); } void cfg80211_sme_disassoc(struct wireless_dev *wdev) @@ -399,7 +416,13 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev) void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) { - cfg80211_sme_disassoc(wdev); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_ASSOC_FAILED; + schedule_work(&rdev->conn_work); } static int cfg80211_sme_connect(struct wireless_dev *wdev, -- cgit v1.2.3 From f77b86d7d3acf9dfcb5ee834628d12207584b2cb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Jun 2013 15:43:38 +0200 Subject: regulatory: add missing rtnl locking restore_regulatory_settings() requires the RTNL to be held, add the missing locking in reg_timeout_work(). Signed-off-by: Johannes Berg --- net/wireless/reg.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5a24c986f34..5a950f36bae 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2279,7 +2279,9 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) static void reg_timeout_work(struct work_struct *work) { REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); + rtnl_lock(); restore_regulatory_settings(true); + rtnl_unlock(); } int __init regulatory_init(void) -- cgit v1.2.3 From 1cd158573951f737fbc878a35cb5eb47bf9af3d5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 28 Jun 2013 21:04:35 +0200 Subject: mac80211/minstrel_ht: fix cck rate sampling The CCK group needs special treatment to set the right flags and rate index. Add this missing check to prevent setting broken rates for tx packets. Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5b2d3012b98..f5aed963b22 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -804,10 +804,18 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES]; info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + rate->count = 1; + + if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); + rate->idx = mp->cck_rates[idx]; + rate->flags = 0; + return; + } + rate->idx = sample_idx % MCS_GROUP_RATES + (sample_group->streams - 1) * MCS_GROUP_RATES; rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags; - rate->count = 1; } static void -- cgit v1.2.3 From e13bae4f807401729b3f27c7e882a96b8b292809 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Jul 2013 10:43:31 +0200 Subject: mac80211: fix ethtool stats for non-station interfaces As reported in https://bugzilla.kernel.org/show_bug.cgi?id=60514, the station loop never initialises 'sinfo' and therefore adds up a stack values, leaking stack information (the number of times it adds values is easily obtained another way.) Fix this by initialising the sinfo for each station to add. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8184d121ff0..43dd7525bfc 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -666,6 +666,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, if (sta->sdata->dev != dev) continue; + sinfo.filled = 0; + sta_set_sinfo(sta, &sinfo); i = 0; ADD_STA_STATS(sta); } -- cgit v1.2.3 From 83374fe9de455e37c2a039603d2538eb77e8ec4d Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Thu, 11 Jul 2013 18:24:03 +0800 Subject: nl80211: fix the setting of RSSI threshold value for mesh RSSI threshold value used for mesh peering should be in negative value. After range checks to mesh parameters is introduced, this is not allowed. Fix this. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9fb8820b75c..25d217d9080 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4770,9 +4770,9 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1, mask, NL80211_MESHCONF_FORWARDING, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, 1, 255, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0, mask, NL80211_MESHCONF_RSSI_THRESHOLD, - nla_get_u32); + nla_get_s32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16, mask, NL80211_MESHCONF_HT_OPMODE, nla_get_u16); -- cgit v1.2.3 From 6b0f32745dcfba01d7be33acd1b40306c7a914c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 11 Jul 2013 22:33:26 +0200 Subject: mac80211: fix duplicate retransmission detection The duplicate retransmission detection code in mac80211 erroneously attempts to do the check for every frame, even frames that don't have a sequence control field or that don't use it (QoS-Null frames.) This is problematic because it causes the code to access data beyond the end of the SKB and depending on the data there will drop packets erroneously. Correct the code to not do duplicate detection for such frames. I found this error while testing AP powersave, it lead to retransmitted PS-Poll frames being dropped entirely as the data beyond the end of the SKB was always zero. Cc: stable@vger.kernel.org [all versions] Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 23dbcfc69b3..2c5a79bd377 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -936,8 +936,14 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ - if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { + /* + * Drop duplicate 802.11 retransmissions + * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery") + */ + if (rx->skb->len >= 24 && rx->sta && + !ieee80211_is_ctl(hdr->frame_control) && + !ieee80211_is_qos_nullfunc(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1)) { if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { -- cgit v1.2.3 From 5c9fc93bc9bc417418fc1b6366833ae6a07b804d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 15 Jul 2013 14:35:06 +0200 Subject: mac80211/minstrel: fix NULL pointer dereference issue When priv_sta == NULL, mi->prev_sample is dereferenced too early. Move the assignment further down, after the rate_control_send_low call. Reported-by: Krzysztof Mazur Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index ac7ef5414bd..e6512e2ffd2 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -290,7 +290,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, struct minstrel_rate *msr, *mr; unsigned int ndx; bool mrr_capable; - bool prev_sample = mi->prev_sample; + bool prev_sample; int delta; int sampling_ratio; @@ -314,6 +314,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, (mi->sample_count + mi->sample_deferred / 2); /* delta < 0: no sampling required */ + prev_sample = mi->prev_sample; mi->prev_sample = false; if (delta < 0 || (!mrr_capable && prev_sample)) return; -- cgit v1.2.3 From f2f79cca13e36972978ffd1fb6483c8a1141b510 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Sat, 13 Jul 2013 11:26:51 +0300 Subject: ndisc: bool initializations should use true and false Signed-off-by: Daniel Baluta Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b3b5730b48c..24c03396e00 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -479,7 +479,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, if (ifp) { src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) - override = 0; + override = false; inc_opt |= ifp->idev->cnf.force_tllao; in6_ifa_put(ifp); } else { @@ -557,7 +557,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, } if (ipv6_addr_any(saddr)) - inc_opt = 0; + inc_opt = false; if (inc_opt) optlen += ndisc_opt_addr_space(dev); @@ -790,7 +790,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && - inc != 0 && + inc && idev->nd_parms->proxy_delay != 0) { /* * for anycast or proxy, -- cgit v1.2.3 From 8a73125c36809527236e1d8d367a09a3f0e9f9e1 Mon Sep 17 00:00:00 2001 From: Dragos Foianu Date: Sat, 13 Jul 2013 14:43:00 +0100 Subject: ethtool: fixed trailing statements in ethtool Applied fixes suggested by checkpatch.pl. Signed-off-by: Dragos Foianu Signed-off-by: David S. Miller --- net/core/ethtool.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index ab5fa6336c8..78e9d9223e4 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -279,11 +279,16 @@ static u32 __ethtool_get_flags(struct net_device *dev) { u32 flags = 0; - if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; - if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN; - if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN; - if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; - if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; + if (dev->features & NETIF_F_LRO) + flags |= ETH_FLAG_LRO; + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) + flags |= ETH_FLAG_RXVLAN; + if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) + flags |= ETH_FLAG_TXVLAN; + if (dev->features & NETIF_F_NTUPLE) + flags |= ETH_FLAG_NTUPLE; + if (dev->features & NETIF_F_RXHASH) + flags |= ETH_FLAG_RXHASH; return flags; } @@ -295,11 +300,16 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) if (data & ~ETH_ALL_FLAGS) return -EINVAL; - if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; - if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_CTAG_RX; - if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_CTAG_TX; - if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; - if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; + if (data & ETH_FLAG_LRO) + features |= NETIF_F_LRO; + if (data & ETH_FLAG_RXVLAN) + features |= NETIF_F_HW_VLAN_CTAG_RX; + if (data & ETH_FLAG_TXVLAN) + features |= NETIF_F_HW_VLAN_CTAG_TX; + if (data & ETH_FLAG_NTUPLE) + features |= NETIF_F_NTUPLE; + if (data & ETH_FLAG_RXHASH) + features |= NETIF_F_RXHASH; /* allow changing only bits set in hw_features */ changed = (features ^ dev->features) & ETH_ALL_FEATURES; -- cgit v1.2.3 From b6a82dd233cabcc1517c0744d7a8f0b61f559caf Mon Sep 17 00:00:00 2001 From: Dragos Foianu Date: Sat, 13 Jul 2013 15:03:55 +0100 Subject: net/irda: fixed style issues in irlan_eth Applied fixes suggested by checkpatch.pl Signed-off-by: Dragos Foianu Signed-off-by: David S. Miller --- net/irda/irlan/irlan_eth.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index d14152e866d..ffcec225b5d 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -44,12 +44,12 @@ static int irlan_eth_open(struct net_device *dev); static int irlan_eth_close(struct net_device *dev); static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev); -static void irlan_eth_set_multicast_list( struct net_device *dev); +static void irlan_eth_set_multicast_list(struct net_device *dev); static const struct net_device_ops irlan_eth_netdev_ops = { - .ndo_open = irlan_eth_open, - .ndo_stop = irlan_eth_close, - .ndo_start_xmit = irlan_eth_xmit, + .ndo_open = irlan_eth_open, + .ndo_stop = irlan_eth_close, + .ndo_start_xmit = irlan_eth_xmit, .ndo_set_rx_mode = irlan_eth_set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -110,7 +110,7 @@ static int irlan_eth_open(struct net_device *dev) { struct irlan_cb *self = netdev_priv(dev); - IRDA_DEBUG(2, "%s()\n", __func__ ); + IRDA_DEBUG(2, "%s()\n", __func__); /* Ready to play! */ netif_stop_queue(dev); /* Wait until data link is ready */ @@ -137,7 +137,7 @@ static int irlan_eth_close(struct net_device *dev) { struct irlan_cb *self = netdev_priv(dev); - IRDA_DEBUG(2, "%s()\n", __func__ ); + IRDA_DEBUG(2, "%s()\n", __func__); /* Stop device */ netif_stop_queue(dev); @@ -310,35 +310,32 @@ static void irlan_eth_set_multicast_list(struct net_device *dev) { struct irlan_cb *self = netdev_priv(dev); - IRDA_DEBUG(2, "%s()\n", __func__ ); + IRDA_DEBUG(2, "%s()\n", __func__); /* Check if data channel has been connected yet */ if (self->client.state != IRLAN_DATA) { - IRDA_DEBUG(1, "%s(), delaying!\n", __func__ ); + IRDA_DEBUG(1, "%s(), delaying!\n", __func__); return; } if (dev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ IRDA_WARNING("Promiscuous mode not implemented by IrLAN!\n"); - } - else if ((dev->flags & IFF_ALLMULTI) || + } else if ((dev->flags & IFF_ALLMULTI) || netdev_mc_count(dev) > HW_MAX_ADDRS) { /* Disable promiscuous mode, use normal mode. */ - IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ ); + IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__); /* hardware_set_filter(NULL); */ irlan_set_multicast_filter(self, TRUE); - } - else if (!netdev_mc_empty(dev)) { - IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ ); + } else if (!netdev_mc_empty(dev)) { + IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__); /* Walk the address list, and load the filter */ /* hardware_set_filter(dev->mc_list); */ irlan_set_multicast_filter(self, TRUE); - } - else { - IRDA_DEBUG(4, "%s(), Clearing multicast filter\n", __func__ ); + } else { + IRDA_DEBUG(4, "%s(), Clearing multicast filter\n", __func__); irlan_set_multicast_filter(self, FALSE); } -- cgit v1.2.3 From 21d1196a35f5686c4323e42a62fdb4b23b0ab4a3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jul 2013 20:03:19 -0700 Subject: ipv4: set transport header earlier commit 45f00f99d6e ("ipv4: tcp: clean up tcp_v4_early_demux()") added a performance regression for non GRO traffic, basically disabling IP early demux. IPv6 stack resets transport header in ip6_rcv() before calling IP early demux in ip6_rcv_finish(), while IPv4 does this only in ip_local_deliver_finish(), _after_ IP early demux. GRO traffic happened to enable IP early demux because transport header is also set in inet_gro_receive() Instead of reverting the faulty commit, we can make IPv4/IPv6 behave the same : transport_header should be set in ip_rcv() instead of ip_local_deliver_finish() ip_local_deliver_finish() can also use skb_network_header_len() which is faster than ip_hdrlen() Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 3da817b89e9..15e3e683ade 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -190,10 +190,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); - __skb_pull(skb, ip_hdrlen(skb)); - - /* Point into the IP datagram, just past the header. */ - skb_reset_transport_header(skb); + __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); { @@ -437,6 +434,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto drop; } + skb->transport_header = skb->network_header + iph->ihl*4; + /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); -- cgit v1.2.3 From ae8e9c5a1a7889315229a741fd48a5dd0bc2964c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 16 Jul 2013 17:09:15 -0700 Subject: net: Fix sysfs_format_mac() code duplication. It's just a duplicate implementation of "%*phC". Thanks to Joe Perches for showing that we had exactly this support in the lib/vsprintf.c code already. Signed-off-by: David S. Miller --- net/ethernet/eth.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) (limited to 'net') diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 5359560926b..be1f64d3535 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -401,27 +401,8 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, } EXPORT_SYMBOL(alloc_etherdev_mqs); -static size_t _format_mac_addr(char *buf, int buflen, - const unsigned char *addr, int len) -{ - int i; - char *cp = buf; - - for (i = 0; i < len; i++) { - cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]); - if (i == len - 1) - break; - cp += scnprintf(cp, buflen - (cp - buf), ":"); - } - return cp - buf; -} - ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) { - size_t l; - - l = _format_mac_addr(buf, PAGE_SIZE, addr, len); - l += scnprintf(buf + l, PAGE_SIZE - l, "\n"); - return (ssize_t)l; + return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr); } EXPORT_SYMBOL(sysfs_format_mac); -- cgit v1.2.3 From 87f40dd6ce7042caca0b3b557e8923127f51f902 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 16 Jul 2013 08:52:30 +0200 Subject: pkt_sched: sch_qfq: remove a source of high packet delay/jitter QFQ+ inherits from QFQ a design choice that may cause a high packet delay/jitter and a severe short-term unfairness. As QFQ, QFQ+ uses a special quantity, the system virtual time, to track the service provided by the ideal system it approximates. When a packet is dequeued, this quantity must be incremented by the size of the packet, divided by the sum of the weights of the aggregates waiting to be served. Tracking this sum correctly is a non-trivial task, because, to preserve tight service guarantees, the decrement of this sum must be delayed in a special way [1]: this sum can be decremented only after that its value would decrease also in the ideal system approximated by QFQ+. For efficiency, QFQ+ keeps track only of the 'instantaneous' weight sum, increased and decreased immediately as the weight of an aggregate changes, and as an aggregate is created or destroyed (which, in its turn, happens as a consequence of some class being created/destroyed/changed). However, to avoid the problems caused to service guarantees by these immediate decreases, QFQ+ increments the system virtual time using the maximum value allowed for the weight sum, 2^10, in place of the dynamic, instantaneous value. The instantaneous value of the weight sum is used only to check whether a request of weight increase or a class creation can be satisfied. Unfortunately, the problems caused by this choice are worse than the temporary degradation of the service guarantees that may occur, when a class is changed or destroyed, if the instantaneous value of the weight sum was used to update the system virtual time. In fact, the fraction of the link bandwidth guaranteed by QFQ+ to each aggregate is equal to the ratio between the weight of the aggregate and the sum of the weights of the competing aggregates. The packet delay guaranteed to the aggregate is instead inversely proportional to the guaranteed bandwidth. By using the maximum possible value, and not the actual value of the weight sum, QFQ+ provides each aggregate with the worst possible service guarantees, and not with service guarantees related to the actual set of competing aggregates. To see the consequences of this fact, consider the following simple example. Suppose that only the following aggregates are backlogged, i.e., that only the classes in the following aggregates have packets to transmit: one aggregate with weight 10, say A, and ten aggregates with weight 1, say B1, B2, ..., B10. In particular, suppose that these aggregates are always backlogged. Given the weight distribution, the smoothest and fairest service order would be: A B1 A B2 A B3 A B4 A B5 A B6 A B7 A B8 A B9 A B10 A B1 A B2 ... QFQ+ would provide exactly this optimal service if it used the actual value for the weight sum instead of the maximum possible value, i.e., 11 instead of 2^10. In contrast, since QFQ+ uses the latter value, it serves aggregates as follows (easy to prove and to reproduce experimentally): A B1 B2 B3 B4 B5 B6 B7 B8 B9 B10 A A A A A A A A A A B1 B2 ... B10 A A ... By replacing 10 with N in the above example, and by increasing N, one can increase at will the maximum packet delay and the jitter experienced by the classes in aggregate A. This patch addresses this issue by just using the above 'instantaneous' value of the weight sum, instead of the maximum possible value, when updating the system virtual time. After the instantaneous weight sum is decreased, QFQ+ may deviate from the ideal service for a time interval in the order of the time to serve one maximum-size packet for each backlogged class. The worst-case extent of the deviation exhibited by QFQ+ during this time interval [1] is basically the same as of the deviation described above (but, without this patch, QFQ+ suffers from such a deviation all the time). Finally, this patch modifies the comment to the function qfq_slot_insert, to make it coherent with the fact that the weight sum used by QFQ+ can now be lower than the maximum possible value. [1] P. Valente, "Extending WF2Q+ to support a dynamic traffic mix", Proceedings of AAA-IDEA'05, June 2005. Signed-off-by: Paolo Valente Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 85 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index a7ab323849b..8056fb4e618 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -113,7 +113,6 @@ #define FRAC_BITS 30 /* fixed point arithmetic */ #define ONE_FP (1UL << FRAC_BITS) -#define IWSUM (ONE_FP/QFQ_MAX_WSUM) #define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ #define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ @@ -189,6 +188,7 @@ struct qfq_sched { struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */ u32 num_active_agg; /* Num. of active aggregates */ u32 wsum; /* weight sum */ + u32 iwsum; /* inverse weight sum */ unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ @@ -314,6 +314,7 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg, q->wsum += (int) agg->class_weight * (new_num_classes - agg->num_classes); + q->iwsum = ONE_FP / q->wsum; agg->num_classes = new_num_classes; } @@ -340,6 +341,10 @@ static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg) { if (!hlist_unhashed(&agg->nonfull_next)) hlist_del_init(&agg->nonfull_next); + q->wsum -= agg->class_weight; + if (q->wsum != 0) + q->iwsum = ONE_FP / q->wsum; + if (q->in_serv_agg == agg) q->in_serv_agg = qfq_choose_next_agg(q); kfree(agg); @@ -834,38 +839,60 @@ static void qfq_make_eligible(struct qfq_sched *q) } } - /* - * The index of the slot in which the aggregate is to be inserted must - * not be higher than QFQ_MAX_SLOTS-2. There is a '-2' and not a '-1' - * because the start time of the group may be moved backward by one - * slot after the aggregate has been inserted, and this would cause - * non-empty slots to be right-shifted by one position. + * The index of the slot in which the input aggregate agg is to be + * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2' + * and not a '-1' because the start time of the group may be moved + * backward by one slot after the aggregate has been inserted, and + * this would cause non-empty slots to be right-shifted by one + * position. + * + * QFQ+ fully satisfies this bound to the slot index if the parameters + * of the classes are not changed dynamically, and if QFQ+ never + * happens to postpone the service of agg unjustly, i.e., it never + * happens that the aggregate becomes backlogged and eligible, or just + * eligible, while an aggregate with a higher approximated finish time + * is being served. In particular, in this case QFQ+ guarantees that + * the timestamps of agg are low enough that the slot index is never + * higher than 2. Unfortunately, QFQ+ cannot provide the same + * guarantee if it happens to unjustly postpone the service of agg, or + * if the parameters of some class are changed. + * + * As for the first event, i.e., an out-of-order service, the + * upper bound to the slot index guaranteed by QFQ+ grows to + * 2 + + * QFQ_MAX_AGG_CLASSES * ((1<budget -= len; - q->V += (u64)len * IWSUM; + q->V += (u64)len * q->iwsum; pr_debug("qfq dequeue: len %u F %lld now %lld\n", len, (unsigned long long) in_serv_agg->F, (unsigned long long) q->V); -- cgit v1.2.3 From d4b812dea4a236f729526facf97df1a9d18e191c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jul 2013 07:19:26 -0700 Subject: vlan: mask vlan prio bits In commit 48cc32d38a52d0b68f91a171a8d00531edc6a46e ("vlan: don't deliver frames for unknown vlans to protocols") Florian made sure we set pkt_type to PACKET_OTHERHOST if the vlan id is set and we could find a vlan device for this particular id. But we also have a problem if prio bits are set. Steinar reported an issue on a router receiving IPv6 frames with a vlan tag of 4000 (id 0, prio 2), and tunneled into a sit device, because skb->vlan_tci is set. Forwarded frame is completely corrupted : We can see (8100:4000) being inserted in the middle of IPv6 source address : 16:48:00.780413 IP6 2001:16d8:8100:4000:ee1c:0:9d9:bc87 > 9f94:4d95:2001:67c:29f4::: ICMP6, unknown icmp6 type (0), length 64 0x0000: 0000 0029 8000 c7c3 7103 0001 a0ae e651 0x0010: 0000 0000 ccce 0b00 0000 0000 1011 1213 0x0020: 1415 1617 1819 1a1b 1c1d 1e1f 2021 2223 0x0030: 2425 2627 2829 2a2b 2c2d 2e2f 3031 3233 It seems we are not really ready to properly cope with this right now. We can probably do better in future kernels : vlan_get_ingress_priority() should be a netdev property instead of a per vlan_dev one. For stable kernels, lets clear vlan_tci to fix the bugs. Reported-by: Steinar H. Gunderson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 2 +- net/core/dev.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 8a15eaadc4b..4a78c4de9f2 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; __be16 vlan_proto = skb->vlan_proto; - u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; + u16 vlan_id = vlan_tx_tag_get_id(skb); struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; diff --git a/net/core/dev.c b/net/core/dev.c index a3d8d44cb7f..26755dd40da 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3580,8 +3580,15 @@ ncls: } } - if (vlan_tx_nonzero_tag_present(skb)) - skb->pkt_type = PACKET_OTHERHOST; + if (unlikely(vlan_tx_tag_present(skb))) { + if (vlan_tx_tag_get_id(skb)) + skb->pkt_type = PACKET_OTHERHOST; + /* Note: we might in the future use prio bits + * and set skb->priority like in vlan_do_receive() + * For the time being, just ignore Priority Code Point + */ + skb->vlan_tci = 0; + } /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; -- cgit v1.2.3 From 3e3aac497513c669e1c62c71e1d552ea85c1d974 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jul 2013 09:35:10 -0700 Subject: vlan: fix a race in egress prio management egress_priority_map[] hash table updates are protected by rtnl, and we never remove elements until device is dismantled. We have to make sure that before inserting an new element in hash table, all its fields are committed to memory or else another cpu could find corrupt values and crash. Signed-off-by: Eric Dumazet Cc: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3a8c8fd63c8..1cd3d2a406f 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -73,6 +73,8 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) { struct vlan_priority_tci_mapping *mp; + smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */ + mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)]; while (mp) { if (mp->priority == skb->priority) { @@ -249,6 +251,11 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, np->next = mp; np->priority = skb_prio; np->vlan_qos = vlan_qos; + /* Before inserting this element in hash table, make sure all its fields + * are committed to memory. + * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask() + */ + smp_wmb(); vlan->egress_priority_map[skb_prio & 0xF] = np; if (vlan_qos) vlan->nr_egress_mappings++; -- cgit v1.2.3 From 7427b370e0aa6226c763af94fc5c4e3433383543 Mon Sep 17 00:00:00 2001 From: Frederic Danis Date: Thu, 20 Jun 2013 11:11:04 +0200 Subject: NFC: Fix NCI over SPI build kbuild test robot found following error: net/built-in.o: In function `nci_spi_send': >> spi.c:(.text+0x19a76f): undefined reference to `crc_ccitt' Add CRC_CCITT module to Kconfig to fix it Reported-by: kbuild test robot. Signed-off-by: Frederic Danis Signed-off-by: Samuel Ortiz --- net/nfc/nci/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig index 2a2416080b4..a4f1e42e348 100644 --- a/net/nfc/nci/Kconfig +++ b/net/nfc/nci/Kconfig @@ -11,6 +11,7 @@ config NFC_NCI config NFC_NCI_SPI depends on NFC_NCI && SPI + select CRC_CCITT bool "NCI over SPI protocol support" default n help -- cgit v1.2.3 From 651e92716aaae60fc41b9652f54cb6803896e0da Mon Sep 17 00:00:00 2001 From: Michal Tesar Date: Fri, 19 Jul 2013 14:09:01 +0200 Subject: sysctl net: Keep tcp_syn_retries inside the boundary Limit the min/max value passed to the /proc/sys/net/ipv4/tcp_syn_retries. Signed-off-by: Michal Tesar Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b2c123c44d6..610e324348d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -36,6 +36,8 @@ static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; static int ip_ttl_min = 1; static int ip_ttl_max = 255; +static int tcp_syn_retries_min = 1; +static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; @@ -332,7 +334,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_syn_retries_min, + .extra2 = &tcp_syn_retries_max }, { .procname = "tcp_synack_retries", -- cgit v1.2.3 From 1faabf2aab1fdaa1ace4e8c829d1b9cf7bfec2f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Jul 2013 20:07:16 -0700 Subject: bridge: do not call setup_timer() multiple times commit 9f00b2e7cf24 ("bridge: only expire the mdb entry when query is received") added a nasty bug as an active timer can be reinitialized. setup_timer() must be done once, no matter how many time mod_timer() is called. br_multicast_new_group() is the right place to do this. Reported-by: Srivatsa S. Bhat Diagnosed-by: Thomas Gleixner Signed-off-by: Eric Dumazet Tested-by: Srivatsa S. Bhat Cc: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 69af490cce4..4b99c9a2704 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -619,6 +619,9 @@ rehash: mp->br = br; mp->addr = *group; + setup_timer(&mp->timer, br_multicast_group_expired, + (unsigned long)mp); + hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); mdb->size++; @@ -1126,7 +1129,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!mp) goto out; - setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); mod_timer(&mp->timer, now + br->multicast_membership_interval); mp->timer_armed = true; @@ -1204,7 +1206,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!mp) goto out; - setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); mod_timer(&mp->timer, now + br->multicast_membership_interval); mp->timer_armed = true; -- cgit v1.2.3 From 40d18ff959fe8b847be4f7b03f84644a7c18211e Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Fri, 19 Jul 2013 17:37:39 +0800 Subject: mac80211: prevent the buffering or frame transmission to non-assoc mesh STA This patch is intended to avoid the buffering to non-assoc mesh STA and also to avoid the triggering of frame to non-assoc mesh STA which could cause kernel panic in specific hw. One of the examples, is kernel panic happens to ath9k if user space inserts the mesh STA and not proceed with the SAE and AMPE, and later the same mesh STA is detected again. The sta_state of the mesh STA remains at IEEE80211_STA_NONE and if the ieee80211_sta_ps_deliver_wakeup is called and subsequently the ath_tx_aggr_wakeup, the kernel panic due to ath_tx_node_init is not called before to initialize the require data structures. This issue is reported by Cedric Voncken before. http://www.spinics.net/lists/linux-wireless/msg106342.html [<831ea6b4>] ath_tx_aggr_wakeup+0x44/0xcc [ath9k] [<83084214>] ieee80211_sta_ps_deliver_wakeup+0xb8/0x208 [mac80211] [<830b9824>] ieee80211_mps_sta_status_update+0x94/0x108 [mac80211] [<83099398>] ieee80211_sta_ps_transition+0xc94/0x34d8 [mac80211] [<8022399c>] nf_iterate+0x98/0x104 [<8309bb60>] ieee80211_sta_ps_transition+0x345c/0x34d8 [mac80211] Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/mesh_ps.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 3b7bfc01ee3..22290a929b9 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -229,6 +229,10 @@ void ieee80211_mps_sta_status_update(struct sta_info *sta) enum nl80211_mesh_power_mode pm; bool do_buffer; + /* For non-assoc STA, prevent buffering or frame transmission */ + if (sta->sta_state < IEEE80211_STA_ASSOC) + return; + /* * use peer-specific power mode if peering is established and the * peer's power mode is known -- cgit v1.2.3 From cd34f647a78e7f2296fcb72392b9e5c832793e65 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 23 Jul 2013 13:56:50 +0200 Subject: mac80211: fix monitor interface suspend crash regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My commit: commit 12e7f517029dad819c45eca9ca01fdb9ba57616b Author: Stanislaw Gruszka Date: Thu Feb 28 10:55:26 2013 +0100 mac80211: cleanup generic suspend/resume procedures removed check for deleting MONITOR and AP_VLAN when suspend. That can cause a crash (i.e. in iwlagn_mac_remove_interface()) since we remove interface in the driver that we did not add before. Reference: http://marc.info/?l=linux-kernel&m=137391815113860&w=2 Bisected-by: Ortwin Glück Reported-and-tested-by: Ortwin Glück Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- net/mac80211/pm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 7fc5d0d8149..34012620434 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -99,10 +99,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } mutex_unlock(&local->sta_mtx); - /* remove all interfaces */ + /* remove all interfaces that were created in the driver */ list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) + if (!ieee80211_sdata_running(sdata) || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; + drv_remove_interface(local, sdata); } -- cgit v1.2.3 From f585a991e1d1612265f0d4e812f77e40dd54975b Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Mon, 22 Jul 2013 12:01:58 -0700 Subject: fib_trie: potential out of bounds access in trie_show_stats() With the <= max condition in the for loop, it will be always go 1 element further than needed. If the condition for the while loop is never met, then max is MAX_STAT_DEPTH, and for loop will walk off the end of nodesizes[]. Signed-off-by: Jerry Snitselaar Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 49616fed934..108a1e9c9ea 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2133,7 +2133,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) max--; pointers = 0; - for (i = 1; i <= max; i++) + for (i = 1; i < max; i++) if (stat->nodesizes[i] != 0) { seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); pointers += (1<nodesizes[i]; -- cgit v1.2.3 From 905a6f96a1b18e490a75f810d733ced93c39b0e5 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 22 Jul 2013 23:45:53 +0200 Subject: ipv6: take rtnl_lock and mark mrt6 table as freed on namespace cleanup Otherwise we end up dereferencing the already freed net->ipv6.mrt pointer which leads to a panic (from Srivatsa S. Bhat): BUG: unable to handle kernel paging request at ffff882018552020 IP: [] ip6mr_sk_done+0x32/0xb0 [ipv6] PGD 290a067 PUD 207ffe0067 PMD 207ff1d067 PTE 8000002018552060 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC Modules linked in: ebtable_nat ebtables nfs fscache nf_conntrack_ipv4 nf_defrag_ipv4 ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables nfsd lockd nfs_acl exportfs auth_rpcgss autofs4 sunrpc 8021q garp bridge stp llc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter +ip6_tables ipv6 vfat fat vhost_net macvtap macvlan vhost tun kvm_intel kvm uinput iTCO_wdt iTCO_vendor_support cdc_ether usbnet mii microcode i2c_i801 i2c_core lpc_ich mfd_core shpchp ioatdma dca mlx4_core be2net wmi acpi_cpufreq mperf ext4 jbd2 mbcache dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 7 Comm: kworker/u33:0 Not tainted 3.11.0-rc1-ea45e-a #4 Hardware name: IBM -[8737R2A]-/00Y2738, BIOS -[B2E120RUS-1.20]- 11/30/2012 Workqueue: netns cleanup_net task: ffff8810393641c0 ti: ffff881039366000 task.ti: ffff881039366000 RIP: 0010:[] [] ip6mr_sk_done+0x32/0xb0 [ipv6] RSP: 0018:ffff881039367bd8 EFLAGS: 00010286 RAX: ffff881039367fd8 RBX: ffff882018552000 RCX: dead000000200200 RDX: 0000000000000000 RSI: ffff881039367b68 RDI: ffff881039367b68 RBP: ffff881039367bf8 R08: ffff881039367b68 R09: 2222222222222222 R10: 2222222222222222 R11: 2222222222222222 R12: ffff882015a7a040 R13: ffff882014eb89c0 R14: ffff8820289e2800 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88103fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff882018552020 CR3: 0000000001c0b000 CR4: 00000000000407f0 Stack: ffff881039367c18 ffff882014eb89c0 ffff882015e28c00 0000000000000000 ffff881039367c18 ffffffffa034d9d1 ffff8820289e2800 ffff882014eb89c0 ffff881039367c58 ffffffff815bdecb ffffffff815bddf2 ffff882014eb89c0 Call Trace: [] rawv6_close+0x21/0x40 [ipv6] [] inet_release+0xfb/0x220 [] ? inet_release+0x22/0x220 [] inet6_release+0x3f/0x50 [ipv6] [] sock_release+0x29/0xa0 [] sk_release_kernel+0x30/0x70 [] icmpv6_sk_exit+0x3b/0x80 [ipv6] [] ops_exit_list+0x39/0x60 [] cleanup_net+0xfb/0x1a0 [] process_one_work+0x1da/0x610 [] ? process_one_work+0x169/0x610 [] worker_thread+0x120/0x3a0 [] ? process_one_work+0x610/0x610 [] kthread+0xee/0x100 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? __init_kthread_worker+0x70/0x70 Code: 20 48 89 5d e8 4c 89 65 f0 4c 89 6d f8 66 66 66 66 90 4c 8b 67 30 49 89 fd e8 db 3c 1e e1 49 8b 9c 24 90 08 00 00 48 85 db 74 06 <4c> 39 6b 20 74 20 bb f3 ff ff ff e8 8e 3c 1e e1 89 d8 4c 8b 65 RIP [] ip6mr_sk_done+0x32/0xb0 [ipv6] RSP CR2: ffff882018552020 Reported-by: Srivatsa S. Bhat Tested-by: Srivatsa S. Bhat Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 583e8d435f9..03986d31fa4 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -259,10 +259,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr6_table *mrt, *next; + rtnl_lock(); list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { list_del(&mrt->list); ip6mr_free_table(mrt); } + rtnl_unlock(); fib_rules_unregister(net->ipv6.mr6_rules_ops); } #else @@ -289,7 +291,10 @@ static int __net_init ip6mr_rules_init(struct net *net) static void __net_exit ip6mr_rules_exit(struct net *net) { + rtnl_lock(); ip6mr_free_table(net->ipv6.mrt6); + net->ipv6.mrt6 = NULL; + rtnl_unlock(); } #endif -- cgit v1.2.3 From deceb4c062a8dd63fe554c3be2b4bf9151a5cedf Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 23 Jul 2013 20:22:39 +0100 Subject: net: fix comment above build_skb() build_skb() specifies that the data parameter must come from a kmalloc'd area, this is only true if frag_size equals 0, because then build_skb() will use kzsize(data) to figure out the actual data size. Update the comment to reflect that special condition. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 20e02d2605e..3df4d4ccf44 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -309,7 +309,8 @@ EXPORT_SYMBOL(__alloc_skb); * @frag_size: size of fragment, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and - * skb_shared_info. @data must have been allocated by kmalloc() + * skb_shared_info. @data must have been allocated by kmalloc() only if + * @frag_size is 0, otherwise data should come from the page allocator. * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : -- cgit v1.2.3 From 23df0b731954502a9391e739b92927cee4360343 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 21 Jul 2013 16:36:48 +0300 Subject: regulatory: use correct regulatory initiator on wiphy register The current regdomain was not always set by the core. This causes cards with a custom regulatory domain to ignore user initiated changes if done before the card was registered. Signed-off-by: Arik Nemtsov Acked-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- net/wireless/reg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5a950f36bae..de06d5d1287 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2247,10 +2247,13 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) void wiphy_regulatory_register(struct wiphy *wiphy) { + struct regulatory_request *lr; + if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint++; - wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); + lr = get_last_request(); + wiphy_update_regulatory(wiphy, lr->initiator); } void wiphy_regulatory_deregister(struct wiphy *wiphy) -- cgit v1.2.3 From da9910ac4a816b4340944c78d94c02a35527db46 Mon Sep 17 00:00:00 2001 From: Jaganath Kanakkassery Date: Fri, 21 Jun 2013 19:55:11 +0530 Subject: Bluetooth: Fix invalid length check in l2cap_information_rsp() The length check is invalid since the length varies with type of info response. This was introduced by the commit cb3b3152b2f5939d67005cff841a1ca748b19888 Because of this, l2cap info rsp is not handled and command reject is sent. > ACL data: handle 11 flags 0x02 dlen 16 L2CAP(s): Info rsp: type 2 result 0 Extended feature mask 0x00b8 Enhanced Retransmission mode Streaming mode FCS Option Fixed Channels < ACL data: handle 11 flags 0x00 dlen 10 L2CAP(s): Command rej: reason 0 Command not understood Cc: stable@vger.kernel.org Signed-off-by: Jaganath Kanakkassery Signed-off-by: Chan-Yeol Park Acked-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- net/bluetooth/l2cap_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4be6a264b47..68843a28a7a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4333,7 +4333,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data; u16 type, result; - if (cmd_len != sizeof(*rsp)) + if (cmd_len < sizeof(*rsp)) return -EPROTO; type = __le16_to_cpu(rsp->type); -- cgit v1.2.3 From fcee337704d76446e0d4714cc5eff53e896f7c6f Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Thu, 11 Jul 2013 11:34:28 +0100 Subject: Bluetooth: Fix race between hci_register_dev() and hci_dev_open() If hci_dev_open() is called after hci_register_dev() added the device to the hci_dev_list but before the workqueue are created we could run into a NULL pointer dereference (see below). This bug is very unlikely to happen, systems using bluetoothd to manage their bluetooth devices will never see this happen. BUG: unable to handle kernel NULL pointer dereference 0100 IP: [] __queue_work+0x32/0x3d0 (...) Call Trace: [] queue_work_on+0x45/0x50 [] hci_req_run+0xbf/0xf0 [bluetooth] [] ? hci_init2_req+0x720/0x720 [bluetooth] [] __hci_req_sync+0xd6/0x1c0 [bluetooth] [] ? try_to_wake_up+0x2b0/0x2b0 [] ? usb_autopm_put_interface+0x30/0x40 [] hci_dev_open+0x275/0x2e0 [bluetooth] [] hci_sock_ioctl+0x1f2/0x3f0 [bluetooth] [] sock_do_ioctl+0x30/0x70 [] sock_ioctl+0x79/0x2f0 [] do_vfs_ioctl+0x96/0x560 [] SyS_ioctl+0x91/0xb0 [] system_call_fastpath+0x1a/0x1f Reported-by: Sedat Dilek Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ace5e55fe5a..64d33d1e14c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2207,10 +2207,6 @@ int hci_register_dev(struct hci_dev *hdev) BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); - write_lock(&hci_dev_list_lock); - list_add(&hdev->list, &hci_dev_list); - write_unlock(&hci_dev_list_lock); - hdev->workqueue = alloc_workqueue(hdev->name, WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 1); if (!hdev->workqueue) { @@ -2246,6 +2242,10 @@ int hci_register_dev(struct hci_dev *hdev) if (hdev->dev_type != HCI_AMP) set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + write_lock(&hci_dev_list_lock); + list_add(&hdev->list, &hci_dev_list); + write_unlock(&hci_dev_list_lock); + hci_notify(hdev, HCI_DEV_REG); hci_dev_hold(hdev); @@ -2258,9 +2258,6 @@ err_wqueue: destroy_workqueue(hdev->req_workqueue); err: ida_simple_remove(&hci_index_ida, hdev->id); - write_lock(&hci_dev_list_lock); - list_del(&hdev->list); - write_unlock(&hci_dev_list_lock); return error; } -- cgit v1.2.3 From 555445cd11803c6bc93b2be31968f3949ef7708b Mon Sep 17 00:00:00 2001 From: Francesco Fusco Date: Wed, 24 Jul 2013 10:39:06 +0200 Subject: neigh: prevent overflowing params in /proc/sys/net/ipv4/neigh/ Without this patch, the fields app_solicit, gc_thresh1, gc_thresh2, gc_thresh3, proxy_qlen, ucast_solicit, mcast_solicit could have assumed negative values when setting large numbers. Signed-off-by: Francesco Fusco Signed-off-by: David S. Miller --- net/core/neighbour.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b7de821f98d..9232c68941a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2767,6 +2767,7 @@ EXPORT_SYMBOL(neigh_app_ns); #ifdef CONFIG_SYSCTL static int zero; +static int int_max = INT_MAX; static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); static int proc_unres_qlen(struct ctl_table *ctl, int write, @@ -2819,19 +2820,25 @@ static struct neigh_sysctl_table { .procname = "mcast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_UCAST_PROBE] = { .procname = "ucast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_APP_PROBE] = { .procname = "app_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_RETRANS_TIME] = { .procname = "retrans_time", @@ -2874,7 +2881,9 @@ static struct neigh_sysctl_table { .procname = "proxy_qlen", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_ANYCAST_DELAY] = { .procname = "anycast_delay", @@ -2916,19 +2925,25 @@ static struct neigh_sysctl_table { .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH2] = { .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH3] = { .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, {}, }, -- cgit v1.2.3 From c74f2b2678f40b80265dd53556f1f778c8e1823f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 26 Jul 2013 11:00:10 +0200 Subject: genetlink: release cb_lock before requesting additional module Requesting external module with cb_lock taken can result in the deadlock like showed below: [ 2458.111347] Showing all locks held in the system: [ 2458.111347] 1 lock held by NetworkManager/582: [ 2458.111347] #0: (cb_lock){++++++}, at: [] genl_rcv+0x19/0x40 [ 2458.111347] 1 lock held by modprobe/603: [ 2458.111347] #0: (cb_lock){++++++}, at: [] genl_lock_all+0x15/0x30 [ 2461.579457] SysRq : Show Blocked State [ 2461.580103] task PC stack pid father [ 2461.580103] NetworkManager D ffff880034b84500 4040 582 1 0x00000080 [ 2461.580103] ffff8800197ff720 0000000000000046 00000000001d5340 ffff8800197fffd8 [ 2461.580103] ffff8800197fffd8 00000000001d5340 ffff880019631700 7fffffffffffffff [ 2461.580103] ffff8800197ff880 ffff8800197ff878 ffff880019631700 ffff880019631700 [ 2461.580103] Call Trace: [ 2461.580103] [] schedule+0x29/0x70 [ 2461.580103] [] schedule_timeout+0x1c1/0x360 [ 2461.580103] [] ? mark_held_locks+0xbb/0x140 [ 2461.580103] [] ? _raw_spin_unlock_irq+0x2c/0x50 [ 2461.580103] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 2461.580103] [] wait_for_completion_killable+0xe8/0x170 [ 2461.580103] [] ? wake_up_state+0x20/0x20 [ 2461.580103] [] call_usermodehelper_exec+0x1a5/0x210 [ 2461.580103] [] ? wait_for_completion_killable+0x3d/0x170 [ 2461.580103] [] __request_module+0x1b3/0x370 [ 2461.580103] [] ? trace_hardirqs_on_caller+0xfd/0x1c0 [ 2461.580103] [] ctrl_getfamily+0x159/0x190 [ 2461.580103] [] genl_family_rcv_msg+0x1f4/0x2e0 [ 2461.580103] [] ? genl_family_rcv_msg+0x2e0/0x2e0 [ 2461.580103] [] genl_rcv_msg+0x8e/0xd0 [ 2461.580103] [] netlink_rcv_skb+0xa9/0xc0 [ 2461.580103] [] genl_rcv+0x28/0x40 [ 2461.580103] [] netlink_unicast+0xdd/0x190 [ 2461.580103] [] netlink_sendmsg+0x329/0x750 [ 2461.580103] [] sock_sendmsg+0x99/0xd0 [ 2461.580103] [] ? local_clock+0x5f/0x70 [ 2461.580103] [] ? lock_release_non_nested+0x308/0x350 [ 2461.580103] [] ___sys_sendmsg+0x39e/0x3b0 [ 2461.580103] [] ? kvm_clock_read+0x2f/0x50 [ 2461.580103] [] ? sched_clock+0x9/0x10 [ 2461.580103] [] ? sched_clock_local+0x1d/0x80 [ 2461.580103] [] ? sched_clock_cpu+0xa8/0x100 [ 2461.580103] [] ? trace_hardirqs_off+0xd/0x10 [ 2461.580103] [] ? local_clock+0x5f/0x70 [ 2461.580103] [] ? lock_release_holdtime.part.28+0xf/0x1a0 [ 2461.580103] [] ? fget_light+0xf9/0x510 [ 2461.580103] [] ? fget_light+0x3c/0x510 [ 2461.580103] [] __sys_sendmsg+0x42/0x80 [ 2461.580103] [] SyS_sendmsg+0x12/0x20 [ 2461.580103] [] system_call_fastpath+0x16/0x1b [ 2461.580103] modprobe D ffff88000f2c8000 4632 603 602 0x00000080 [ 2461.580103] ffff88000f04fba8 0000000000000046 00000000001d5340 ffff88000f04ffd8 [ 2461.580103] ffff88000f04ffd8 00000000001d5340 ffff8800377d4500 ffff8800377d4500 [ 2461.580103] ffffffff81d0b260 ffffffff81d0b268 ffffffff00000000 ffffffff81d0b2b0 [ 2461.580103] Call Trace: [ 2461.580103] [] schedule+0x29/0x70 [ 2461.580103] [] rwsem_down_write_failed+0xed/0x1a0 [ 2461.580103] [] ? update_cpu_load_active+0x10/0xb0 [ 2461.580103] [] call_rwsem_down_write_failed+0x13/0x20 [ 2461.580103] [] ? down_write+0x9d/0xb2 [ 2461.580103] [] ? genl_lock_all+0x15/0x30 [ 2461.580103] [] genl_lock_all+0x15/0x30 [ 2461.580103] [] genl_register_family+0x53/0x1f0 [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] genl_register_family_with_ops+0x20/0x80 [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] nl80211_init+0x24/0xf0 [cfg80211] [ 2461.580103] [] ? 0xffffffffa01dbfff [ 2461.580103] [] cfg80211_init+0x43/0xdb [cfg80211] [ 2461.580103] [] do_one_initcall+0xfa/0x1b0 [ 2461.580103] [] ? set_memory_nx+0x43/0x50 [ 2461.580103] [] load_module+0x1c6f/0x27f0 [ 2461.580103] [] ? store_uevent+0x40/0x40 [ 2461.580103] [] SyS_finit_module+0x86/0xb0 [ 2461.580103] [] system_call_fastpath+0x16/0x1b [ 2461.580103] Sched Debug Version: v0.10, 3.11.0-0.rc1.git4.1.fc20.x86_64 #1 Problem start to happen after adding net-pf-16-proto-16-family-nl80211 alias name to cfg80211 module by below commit (though that commit itself is perfectly fine): commit fb4e156886ce6e8309e912d8b370d192330d19d3 Author: Marcel Holtmann Date: Sun Apr 28 16:22:06 2013 -0700 nl80211: Add generic netlink module alias for cfg80211/nl80211 Reported-and-tested-by: Jeff Layton Reported-by: Richard W.M. Jones Signed-off-by: Stanislaw Gruszka Reviewed-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 2fd6dbea327..1076fe16b12 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -877,8 +877,10 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_MODULES if (res == NULL) { genl_unlock(); + up_read(&cb_lock); request_module("net-pf-%d-proto-%d-family-%s", PF_NETLINK, NETLINK_GENERIC, name); + down_read(&cb_lock); genl_lock(); res = genl_family_find_byname(name); } -- cgit v1.2.3 From 3f8e2d75c14660abc8b69206f30190ab93304379 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 24 Jul 2013 02:32:46 +0300 Subject: Bluetooth: Fix HCI init for BlueFRITZ! devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit None of the BlueFRITZ! devices with manufacurer ID 31 (AVM Berlin) support HCI_Read_Local_Supported_Commands. It is safe to use the manufacturer ID (instead of e.g. a USB ID specific quirk) because the company never created any newer controllers. < HCI Command: Read Local Supported Comm.. (0x04|0x0002) plen 0 [hci0] 0.210014 > HCI Event: Command Status (0x0f) plen 4 [hci0] 0.217361 Read Local Supported Commands (0x04|0x0002) ncmd 1 Status: Unknown HCI Command (0x01) Reported-by: Jörg Esser Signed-off-by: Johan Hedberg Tested-by: Jörg Esser Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 64d33d1e14c..0176f200ccb 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -513,7 +513,10 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) hci_setup_event_mask(req); - if (hdev->hci_ver > BLUETOOTH_VER_1_1) + /* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read + * local supported commands HCI command. + */ + if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); if (lmp_ssp_capable(hdev)) { -- cgit v1.2.3 From 53e21fbc288218a423959f878c86471a0e323a9a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 27 Jul 2013 14:11:14 -0500 Subject: Bluetooth: Fix calling request callback more than once In certain circumstances, such as an HCI driver using __hci_cmd_sync_ev with HCI_EV_CMD_COMPLETE as the expected completion event there is the chance that hci_event_packet will call hci_req_cmd_complete twice (once for the explicitly looked after event and another time in the actual handler of cmd_complete). In the case of __hci_cmd_sync_ev this introduces a race where the first call wakes up the blocking __hci_cmd_sync_ev and lets it complete. However, by the time that a second __hci_cmd_sync_ev call is already in progress the second hci_req_cmd_complete call (from the previous operation) will wake up the blocking function prematurely and cause it to fail, as witnessed by the following log: [ 639.232195] hci_rx_work: hci0 Event packet [ 639.232201] hci_req_cmd_complete: opcode 0xfc8e status 0x00 [ 639.232205] hci_sent_cmd_data: hci0 opcode 0xfc8e [ 639.232210] hci_req_sync_complete: hci0 result 0x00 [ 639.232220] hci_cmd_complete_evt: hci0 opcode 0xfc8e [ 639.232225] hci_req_cmd_complete: opcode 0xfc8e status 0x00 [ 639.232228] __hci_cmd_sync_ev: hci0 end: err 0 [ 639.232234] __hci_cmd_sync_ev: hci0 [ 639.232238] hci_req_add_ev: hci0 opcode 0xfc8e plen 250 [ 639.232242] hci_prepare_cmd: skb len 253 [ 639.232246] hci_req_run: length 1 [ 639.232250] hci_sent_cmd_data: hci0 opcode 0xfc8e [ 639.232255] hci_req_sync_complete: hci0 result 0x00 [ 639.232266] hci_cmd_work: hci0 cmd_cnt 1 cmd queued 1 [ 639.232271] __hci_cmd_sync_ev: hci0 end: err 0 [ 639.232276] Bluetooth: hci0 sending Intel patch command (0xfc8e) failed (-61) Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0176f200ccb..48e1e0438f3 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3442,8 +3442,16 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) */ if (hdev->sent_cmd) { req_complete = bt_cb(hdev->sent_cmd)->req.complete; - if (req_complete) + + if (req_complete) { + /* We must set the complete callback to NULL to + * avoid calling the callback more than once if + * this function gets called again. + */ + bt_cb(hdev->sent_cmd)->req.complete = NULL; + goto call_complete; + } } /* Remove all pending commands belonging to this request */ -- cgit v1.2.3 From a0db856a95a29efb1c23db55c02d9f0ff4f0db48 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 30 Jul 2013 00:16:21 -0700 Subject: net_sched: Fix stack info leak in cbq_dump_wrr(). Make sure the reserved fields, and padding (if any), are fully initialized. Based upon a patch by Dan Carpenter and feedback from Joe Perches. Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 71a56886255..7a42c81a19e 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1465,6 +1465,7 @@ static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_wrropt opt; + memset(&opt, 0, sizeof(opt)); opt.flags = 0; opt.allot = cl->allot; opt.priority = cl->priority + 1; -- cgit v1.2.3 From 9ea7187c53f63e31f2d1b2b1e474e31808565009 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 31 Jul 2013 01:19:43 +0200 Subject: NFC: netlink: Rename CMD_FW_UPLOAD to CMD_FW_DOWNLOAD Loading a firmware into a target is typically called firmware download, not firmware upload. So we rename the netlink API to NFC_CMD_FW_DOWNLOAD in order to avoid any terminology confusion from userspace. Signed-off-by: Samuel Ortiz --- net/nfc/core.c | 20 ++++++++++---------- net/nfc/hci/core.c | 8 ++++---- net/nfc/netlink.c | 12 ++++++------ net/nfc/nfc.h | 6 +++--- 4 files changed, 23 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index dc96a83aa6a..1d074dd1650 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -44,7 +44,7 @@ DEFINE_MUTEX(nfc_devlist_mutex); /* NFC device ID bitmap */ static DEFINE_IDA(nfc_index_ida); -int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name) +int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name) { int rc = 0; @@ -62,28 +62,28 @@ int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name) goto error; } - if (!dev->ops->fw_upload) { + if (!dev->ops->fw_download) { rc = -EOPNOTSUPP; goto error; } - dev->fw_upload_in_progress = true; - rc = dev->ops->fw_upload(dev, firmware_name); + dev->fw_download_in_progress = true; + rc = dev->ops->fw_download(dev, firmware_name); if (rc) - dev->fw_upload_in_progress = false; + dev->fw_download_in_progress = false; error: device_unlock(&dev->dev); return rc; } -int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name) { - dev->fw_upload_in_progress = false; + dev->fw_download_in_progress = false; - return nfc_genl_fw_upload_done(dev, firmware_name); + return nfc_genl_fw_download_done(dev, firmware_name); } -EXPORT_SYMBOL(nfc_fw_upload_done); +EXPORT_SYMBOL(nfc_fw_download_done); /** * nfc_dev_up - turn on the NFC device @@ -110,7 +110,7 @@ int nfc_dev_up(struct nfc_dev *dev) goto error; } - if (dev->fw_upload_in_progress) { + if (dev->fw_download_in_progress) { rc = -EBUSY; goto error; } diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 7b1c186736e..fe66908401f 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -809,14 +809,14 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) } } -static int hci_fw_upload(struct nfc_dev *nfc_dev, const char *firmware_name) +static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (!hdev->ops->fw_upload) + if (!hdev->ops->fw_download) return -ENOTSUPP; - return hdev->ops->fw_upload(hdev, firmware_name); + return hdev->ops->fw_download(hdev, firmware_name); } static struct nfc_ops hci_nfc_ops = { @@ -831,7 +831,7 @@ static struct nfc_ops hci_nfc_ops = { .im_transceive = hci_transceive, .tm_send = hci_tm_send, .check_presence = hci_check_presence, - .fw_upload = hci_fw_upload, + .fw_download = hci_fw_download, .discover_se = hci_discover_se, .enable_se = hci_enable_se, .disable_se = hci_disable_se, diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index b05ad909778..f16fd59d416 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1089,7 +1089,7 @@ exit: return rc; } -static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info) +static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; int rc; @@ -1108,13 +1108,13 @@ static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info) nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], sizeof(firmware_name)); - rc = nfc_fw_upload(dev, firmware_name); + rc = nfc_fw_download(dev, firmware_name); nfc_put_device(dev); return rc; } -int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) +int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name) { struct sk_buff *msg; void *hdr; @@ -1124,7 +1124,7 @@ int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, - NFC_CMD_FW_UPLOAD); + NFC_CMD_FW_DOWNLOAD); if (!hdr) goto free_msg; @@ -1251,8 +1251,8 @@ static struct genl_ops nfc_genl_ops[] = { .policy = nfc_genl_policy, }, { - .cmd = NFC_CMD_FW_UPLOAD, - .doit = nfc_genl_fw_upload, + .cmd = NFC_CMD_FW_DOWNLOAD, + .doit = nfc_genl_fw_download, .policy = nfc_genl_policy, }, { diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index ee85a1fc1b2..820a7850c36 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -123,10 +123,10 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter) class_dev_iter_exit(iter); } -int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name); -int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); +int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name); +int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name); -int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name); +int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name); int nfc_dev_up(struct nfc_dev *dev); -- cgit v1.2.3 From ff862a4668dd6dba962b1d2d8bd344afa6375683 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 28 Jul 2013 23:04:45 +0300 Subject: af_key: more info leaks in pfkey messages This is inspired by a5cc68f3d6 "af_key: fix info leaks in notify messages". There are some struct members which don't get initialized and could disclose small amounts of private information. Acked-by: Mathias Krause Signed-off-by: Dan Carpenter Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/key/af_key.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 9da862070dd..ab8bd2cabfa 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2081,6 +2081,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * pol->sadb_x_policy_type = IPSEC_POLICY_NONE; } pol->sadb_x_policy_dir = dir+1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; @@ -3137,7 +3138,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; + pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ if (x->id.proto == IPPROTO_AH) @@ -3525,6 +3528,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = dir + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = 0; pol->sadb_x_policy_priority = 0; -- cgit v1.2.3 From e1ee3673a83cc02b6b5e43c9e647d8dd5e1c4e26 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Mon, 29 Jul 2013 12:30:04 +0200 Subject: genetlink: fix usage of NLM_F_EXCL or NLM_F_REPLACE Currently, it is not possible to use neither NLM_F_EXCL nor NLM_F_REPLACE from genetlink. This is due to this checking in genl_family_rcv_msg: if (nlh->nlmsg_flags & NLM_F_DUMP) NLM_F_DUMP is NLM_F_MATCH|NLM_F_ROOT. Thus, if NLM_F_EXCL or NLM_F_REPLACE flag is set, genetlink believes that you're requesting a dump and it calls the .dumpit callback. The solution that I propose is to refine this checking to make it stricter: if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) And given the combination NLM_F_REPLACE and NLM_F_EXCL does not make sense to me, it removes the ambiguity. There was a patch that tried to fix this some time ago (0ab03c2 netlink: test for all flags of the NLM_F_DUMP composite) but it tried to resolve this ambiguity in *all* existing netlink subsystems, not only genetlink. That patch was reverted since it broke iproute2, which is using NLM_F_ROOT to request the dump of the routing cache. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1076fe16b12..512718adb0d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -571,7 +571,7 @@ static int genl_family_rcv_msg(struct genl_family *family, !capable(CAP_NET_ADMIN)) return -EPERM; - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ops->dumpit, .done = ops->done, -- cgit v1.2.3 From 8cb3b9c3642c0263d48f31d525bcee7170eedc20 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Jul 2013 13:23:39 +0300 Subject: net_sched: info leak in atm_tc_dump_class() The "pvc" struct has a hole after pvc.sap_family which is not cleared. Signed-off-by: Dan Carpenter Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ca8e0a57d94..1f9c31411f1 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -605,6 +605,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, struct sockaddr_atmpvc pvc; int state; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; pvc.sap_addr.vpi = flow->vcc->vpi; -- cgit v1.2.3 From b00589af3b04736376f24625ab0b394642e89e29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 1 Aug 2013 01:06:20 +0200 Subject: bridge: disable snooping if there is no querier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there is no querier on a link then we won't get periodic reports and therefore won't be able to learn about multicast listeners behind ports, potentially leading to lost multicast packets, especially for multicast listeners that joined before the creation of the bridge. These lost multicast packets can appear since c5c23260594 ("bridge: Add multicast_querier toggle and disable queries by default") in particular. With this patch we are flooding multicast packets if our querier is disabled and if we didn't detect any other querier. A grace period of the Maximum Response Delay of the querier is added to give multicast responses enough time to arrive and to be learned from before disabling the flooding behaviour again. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_device.c | 3 ++- net/bridge/br_input.c | 3 ++- net/bridge/br_multicast.c | 39 ++++++++++++++++++++++++++++++--------- net/bridge/br_private.h | 12 ++++++++++++ 4 files changed, 46 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 2ef66781fed..69363bd37f6 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -70,7 +70,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) } mdst = br_mdb_get(br, skb, vid); - if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br)) br_multicast_deliver(mdst, skb); else br_flood_deliver(br, skb, false); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 1b8b8b824cd..8c561c0aa63 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -101,7 +101,8 @@ int br_handle_frame_finish(struct sk_buff *skb) unicast = false; } else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb, vid); - if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br)) { if ((mdst && mdst->mglist) || br_multicast_is_router(br)) skb2 = skb; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 4b99c9a2704..61c5e819380 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1014,6 +1014,16 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, } #endif +static void br_multicast_update_querier_timer(struct net_bridge *br, + unsigned long max_delay) +{ + if (!timer_pending(&br->multicast_querier_timer)) + br->multicast_querier_delay_time = jiffies + max_delay; + + mod_timer(&br->multicast_querier_timer, + jiffies + br->multicast_querier_interval); +} + /* * Add port to router_list * list is maintained ordered by pointer value @@ -1064,11 +1074,11 @@ timer: static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, - int saddr) + int saddr, + unsigned long max_delay) { if (saddr) - mod_timer(&br->multicast_querier_timer, - jiffies + br->multicast_querier_interval); + br_multicast_update_querier_timer(br, max_delay); else if (timer_pending(&br->multicast_querier_timer)) return; @@ -1096,8 +1106,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - br_multicast_query_received(br, port, !!iph->saddr); - group = ih->group; if (skb->len == sizeof(*ih)) { @@ -1121,6 +1129,8 @@ static int br_ip4_multicast_query(struct net_bridge *br, IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } + br_multicast_query_received(br, port, !!iph->saddr, max_delay); + if (!group) goto out; @@ -1176,8 +1186,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr)); - if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; @@ -1198,6 +1206,9 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; } + br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr), + max_delay); + if (!group) goto out; @@ -1643,6 +1654,8 @@ void br_multicast_init(struct net_bridge *br) br->multicast_querier_interval = 255 * HZ; br->multicast_membership_interval = 260 * HZ; + br->multicast_querier_delay_time = 0; + spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, br_multicast_local_router_expired, 0); @@ -1831,6 +1844,8 @@ unlock: int br_multicast_set_querier(struct net_bridge *br, unsigned long val) { + unsigned long max_delay; + val = !!val; spin_lock_bh(&br->multicast_lock); @@ -1838,8 +1853,14 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val) goto unlock; br->multicast_querier = val; - if (val) - br_multicast_start_querier(br); + if (!val) + goto unlock; + + max_delay = br->multicast_query_response_interval; + if (!timer_pending(&br->multicast_querier_timer)) + br->multicast_querier_delay_time = jiffies + max_delay; + + br_multicast_start_querier(br); unlock: spin_unlock_bh(&br->multicast_lock); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3be89b3ce17..2f7da41851b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -267,6 +267,7 @@ struct net_bridge unsigned long multicast_query_interval; unsigned long multicast_query_response_interval; unsigned long multicast_startup_query_interval; + unsigned long multicast_querier_delay_time; spinlock_t multicast_lock; struct net_bridge_mdb_htable __rcu *mdb; @@ -501,6 +502,13 @@ static inline bool br_multicast_is_router(struct net_bridge *br) (br->multicast_router == 1 && timer_pending(&br->multicast_router_timer)); } + +static inline bool br_multicast_querier_exists(struct net_bridge *br) +{ + return time_is_before_jiffies(br->multicast_querier_delay_time) && + (br->multicast_querier || + timer_pending(&br->multicast_querier_timer)); +} #else static inline int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, @@ -557,6 +565,10 @@ static inline bool br_multicast_is_router(struct net_bridge *br) { return 0; } +static inline bool br_multicast_querier_exists(struct net_bridge *br) +{ + return false; +} static inline void br_mdb_init(void) { } -- cgit v1.2.3 From 447383d2ba6061bb069da45f95f223a01bba61dd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Jul 2013 11:30:23 +1000 Subject: NFSD/sunrpc: avoid deadlock on TCP connection due to memory pressure. Since we enabled auto-tuning for sunrpc TCP connections we do not guarantee that there is enough write-space on each connection to queue a reply. If memory pressure causes the window to shrink too small, the request throttling in sunrpc/svc will not accept any requests so no more requests will be handled. Even when pressure decreases the window will not grow again until data is sent on the connection. This means we get a deadlock: no requests will be handled until there is more space, and no space will be allocated until a request is handled. This can be simulated by modifying svc_tcp_has_wspace to inflate the number of byte required and removing the 'svc_sock_setbufsize' calls in svc_setup_socket. I found that multiplying by 16 was enough to make the requirement exceed the default allocation. With this modification in place: mount -o vers=3,proto=tcp 127.0.0.1:/home /mnt would block and eventually time out because the nfs server could not accept any requests. This patch relaxes the request throttling to always allow at least one request through per connection. It does this by checking both sk_stream_min_wspace() and xprt->xpt_reserved are zero. The first is zero when the TCP transmit queue is empty. The second is zero when there are no RPC requests being processed. When both of these are zero the socket is idle and so one more request can safely be allowed through. Applying this patch allows the above mount command to succeed cleanly. Tracing shows that the allocated write buffer space quickly grows and after a few requests are handled, the extra tests are no longer needed to permit further requests to be processed. The main purpose of request throttling is to handle the case when one client is slow at collecting replies and the send queue gets full of replies that the client hasn't acknowledged (at the TCP level) yet. As we only change behaviour when the send queue is empty this main purpose is still preserved. Reported-by: Ben Myers Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 305374d4fb9..7762b9f8a8b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1193,7 +1193,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) return 1; required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; - if (sk_stream_wspace(svsk->sk_sk) >= required) + if (sk_stream_wspace(svsk->sk_sk) >= required || + (sk_stream_min_wspace(svsk->sk_sk) == 0 && + atomic_read(&xprt->xpt_reserved) == 0)) return 1; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); return 0; -- cgit v1.2.3 From 9f96392b0ae6aefc02a9b900c3f4889dfafc8402 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 10 Jun 2013 16:06:44 -0400 Subject: svcrpc: fix gss_rpc_upcall create error Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_rpc_upcall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index d304f41260f..1e1ccf539fa 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -120,7 +120,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) if (IS_ERR(clnt)) { dprintk("RPC: failed to create AF_LOCAL gssproxy " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); *_clnt = NULL; goto out; } -- cgit v1.2.3 From dc43376c26cef74226174a2394f37f2a3f8a8639 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 7 Jun 2013 10:11:19 -0400 Subject: svcrpc: fix gss-proxy xdr decoding oops Uninitialized stack data was being used as the destination for memcpy's. Longer term we'll just delete some of this code; all we're doing is skipping over xdr that we don't care about. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_rpc_xdr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 357f613df7f..3c85d1c8a02 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -430,7 +430,7 @@ static int dummy_enc_nameattr_array(struct xdr_stream *xdr, static int dummy_dec_nameattr_array(struct xdr_stream *xdr, struct gssx_name_attr_array *naa) { - struct gssx_name_attr dummy; + struct gssx_name_attr dummy = { .attr = {.len = 0} }; u32 count, i; __be32 *p; @@ -493,12 +493,13 @@ static int gssx_enc_name(struct xdr_stream *xdr, return err; } + static int gssx_dec_name(struct xdr_stream *xdr, struct gssx_name *name) { - struct xdr_netobj dummy_netobj; - struct gssx_name_attr_array dummy_name_attr_array; - struct gssx_option_array dummy_option_array; + struct xdr_netobj dummy_netobj = { .len = 0 }; + struct gssx_name_attr_array dummy_name_attr_array = { .count = 0 }; + struct gssx_option_array dummy_option_array = { .count = 0 }; int err; /* name->display_name */ -- cgit v1.2.3 From 743e217129f69aab074abe520a464fd0c6b1cca1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 31 Jul 2013 14:11:14 -0400 Subject: svcrpc: fix kfree oops in gss-proxy code mech_oid.data is an array, not kmalloc()'d memory. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_rpc_upcall.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 1e1ccf539fa..af7ffd447fe 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -328,7 +328,6 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data) kfree(data->in_handle.data); kfree(data->out_handle.data); kfree(data->out_token.data); - kfree(data->mech_oid.data); free_svc_cred(&data->creds); } -- cgit v1.2.3 From 7193bd17ea92c4c89016c304362c9be93ce50050 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 31 Jul 2013 17:51:42 -0400 Subject: svcrpc: set cr_gss_mech from gss-proxy as well as legacy upcall The change made to rsc_parse() in 0dc1531aca7fd1440918bd55844a054e9c29acad "svcrpc: store gss mech in svc_cred" should also have been propagated to the gss-proxy codepath. This fixes a crash in the gss-proxy case. Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d0347d148b3..09fb638bcaa 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1180,6 +1180,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, gm = gss_mech_get_by_OID(&ud->mech_oid); if (!gm) goto out; + rsci.cred.cr_gss_mech = gm; status = -EINVAL; /* mech-specific data: */ @@ -1195,7 +1196,6 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, rscp = rsc_update(cd, &rsci, rscp); status = 0; out: - gss_mech_put(gm); rsc_free(&rsci); if (rscp) cache_put(&rscp->h, cd); -- cgit v1.2.3 From 2ac3ac8f86f2fe065d746d9a9abaca867adec577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Thu, 1 Aug 2013 10:04:14 +0200 Subject: ipv6: prevent fib6_run_gc() contention On a high-traffic router with many processors and many IPv6 dst entries, soft lockup in fib6_run_gc() can occur when number of entries reaches gc_thresh. This happens because fib6_run_gc() uses fib6_gc_lock to allow only one thread to run the garbage collector but ip6_dst_gc() doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc() returns. On a system with many entries, this can take some time so that in the meantime, other threads pass the tests in ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for the lock. They then have to run the garbage collector one after another which blocks them for quite long. Resolve this by replacing special value ~0UL of expire parameter to fib6_run_gc() by explicit "force" parameter to choose between spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with force=false if gc_thresh is reached but not max_size. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 19 ++++++++----------- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 4 ++-- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5fc9c7a68d8..d872553ca93 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1632,19 +1632,16 @@ static int fib6_age(struct rt6_info *rt, void *arg) static DEFINE_SPINLOCK(fib6_gc_lock); -void fib6_run_gc(unsigned long expires, struct net *net) +void fib6_run_gc(unsigned long expires, struct net *net, bool force) { - if (expires != ~0UL) { + if (force) { spin_lock_bh(&fib6_gc_lock); - gc_args.timeout = expires ? (int)expires : - net->ipv6.sysctl.ip6_rt_gc_interval; - } else { - if (!spin_trylock_bh(&fib6_gc_lock)) { - mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); - return; - } - gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; + } else if (!spin_trylock_bh(&fib6_gc_lock)) { + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); + return; } + gc_args.timeout = expires ? (int)expires : + net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = icmp6_dst_gc(); @@ -1661,7 +1658,7 @@ void fib6_run_gc(unsigned long expires, struct net *net) static void fib6_gc_timer_cb(unsigned long arg) { - fib6_run_gc(0, (struct net *)arg); + fib6_run_gc(0, (struct net *)arg, true); } static int __net_init fib6_net_init(struct net *net) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 24c03396e00..79aa9652ed8 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1576,7 +1576,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); idev = in6_dev_get(dev); if (!idev) break; @@ -1586,7 +1586,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); break; case NETDEV_NOTIFY_PEERS: ndisc_send_unsol_na(dev); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a8c891aa246..824c424f964 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1326,7 +1326,7 @@ static int ip6_dst_gc(struct dst_ops *ops) goto out; net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); net->ipv6.ip6_rt_last_gc = now; entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) @@ -2827,7 +2827,7 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, net = (struct net *)ctl->extra1; delay = net->ipv6.sysctl.flush_delay; proc_dointvec(ctl, write, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } -- cgit v1.2.3 From 49a18d86f66d33a20144ecb5a34bba0d1856b260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Thu, 1 Aug 2013 10:04:24 +0200 Subject: ipv6: update ip6_rt_last_gc every time GC is run As pointed out by Eric Dumazet, net->ipv6.ip6_rt_last_gc should hold the last time garbage collector was run so that we should update it whenever fib6_run_gc() calls fib6_clean_all(), not only if we got there from ip6_dst_gc(). Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 6 +++++- net/ipv6/route.c | 4 +--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d872553ca93..bff3d821c7e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1634,6 +1634,8 @@ static DEFINE_SPINLOCK(fib6_gc_lock); void fib6_run_gc(unsigned long expires, struct net *net, bool force) { + unsigned long now; + if (force) { spin_lock_bh(&fib6_gc_lock); } else if (!spin_trylock_bh(&fib6_gc_lock)) { @@ -1646,10 +1648,12 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) gc_args.more = icmp6_dst_gc(); fib6_clean_all(net, fib6_age, 0, NULL); + now = jiffies; + net->ipv6.ip6_rt_last_gc = now; if (gc_args.more) mod_timer(&net->ipv6.ip6_fib_timer, - round_jiffies(jiffies + round_jiffies(now + net->ipv6.sysctl.ip6_rt_gc_interval)); else del_timer(&net->ipv6.ip6_fib_timer); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 824c424f964..b70f8979003 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1311,7 +1311,6 @@ static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), static int ip6_dst_gc(struct dst_ops *ops) { - unsigned long now = jiffies; struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; @@ -1321,13 +1320,12 @@ static int ip6_dst_gc(struct dst_ops *ops) int entries; entries = dst_entries_get_fast(ops); - if (time_after(rt_last_gc + rt_min_interval, now) && + if (time_after(rt_last_gc + rt_min_interval, jiffies) && entries <= rt_max_size) goto out; net->ipv6.ip6_rt_gc_expire++; fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); - net->ipv6.ip6_rt_last_gc = now; entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; -- cgit v1.2.3 From 3f8f52982ad020f0704548c46de66bf464d3b967 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 1 Aug 2013 10:41:27 +0200 Subject: ipv6: move peer_addr init into ipv6_add_addr() Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cfdcf7b2daf..a0ce957fb67 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -813,7 +813,8 @@ static u32 inet6_addr_hash(const struct in6_addr *addr) /* On success it returns ifp with increased reference count */ static struct inet6_ifaddr * -ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, +ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, + const struct in6_addr *peer_addr, int pfxlen, int scope, u32 flags) { struct inet6_ifaddr *ifa = NULL; @@ -863,6 +864,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, } ifa->addr = *addr; + if (peer_addr) + ifa->peer_addr = *peer_addr; spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); @@ -1123,8 +1126,8 @@ retry: ift = !max_addresses || ipv6_count_addresses(idev) < max_addresses ? - ipv6_add_addr(idev, &addr, tmp_plen, ipv6_addr_scope(&addr), - addr_flags) : NULL; + ipv6_add_addr(idev, &addr, NULL, tmp_plen, + ipv6_addr_scope(&addr), addr_flags) : NULL; if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -2179,7 +2182,8 @@ ok: */ if (!max_addresses || ipv6_count_addresses(in6_dev) < max_addresses) - ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, + ifp = ipv6_add_addr(in6_dev, &addr, NULL, + pinfo->prefix_len, addr_type&IPV6_ADDR_SCOPE_MASK, addr_flags); @@ -2455,15 +2459,13 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p prefered_lft = timeout; } - ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); + ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = jiffies; - if (peer_pfx) - ifp->peer_addr = *peer_pfx; spin_unlock_bh(&ifp->lock); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, @@ -2557,7 +2559,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; - ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT); + ifp = ipv6_add_addr(idev, addr, NULL, plen, scope, IFA_F_PERMANENT); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -2683,7 +2685,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr #endif - ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); + ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); -- cgit v1.2.3 From 8a226b2cfa776db6011fc84b71578513161cd3d3 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 1 Aug 2013 10:41:28 +0200 Subject: ipv6: prevent race between address creation and removal There's a race in IPv6 automatic addess assignment. The address is created with zero lifetime when it's added to various address lists. Before it gets assigned the correct lifetime, there's a window where a new address may be configured. This causes the semi-initiated address to be deleted in addrconf_verify. This was discovered as a reference leak caused by concurrent run of __ipv6_ifa_notify for both RTM_NEWADDR and RTM_DELADDR with the same address. Fix this by setting the lifetime before the address is added to inet6_addr_lst. A few notes: 1. In addrconf_prefix_rcv, by setting update_lft to zero, the if (update_lft) { ... } condition is no longer executed for newly created addresses. This is okay, as the ifp fields are set in ipv6_add_addr now and ipv6_ifa_notify is called (and has been called) through addrconf_dad_start. 2. The removal of the whole block under ifp->lock in inet6_addr_add is okay, too, as tstamp is initialized to jiffies in ipv6_add_addr. Signed-off-by: Jiri Benc Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a0ce957fb67..da4241c8c7d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -815,7 +815,7 @@ static u32 inet6_addr_hash(const struct in6_addr *addr) static struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, const struct in6_addr *peer_addr, int pfxlen, - int scope, u32 flags) + int scope, u32 flags, u32 valid_lft, u32 prefered_lft) { struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; @@ -875,6 +875,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->scope = scope; ifa->prefix_len = pfxlen; ifa->flags = flags | IFA_F_TENTATIVE; + ifa->valid_lft = valid_lft; + ifa->prefered_lft = prefered_lft; ifa->cstamp = ifa->tstamp = jiffies; ifa->tokenized = false; @@ -1127,7 +1129,8 @@ retry: ift = !max_addresses || ipv6_count_addresses(idev) < max_addresses ? ipv6_add_addr(idev, &addr, NULL, tmp_plen, - ipv6_addr_scope(&addr), addr_flags) : NULL; + ipv6_addr_scope(&addr), addr_flags, + tmp_valid_lft, tmp_prefered_lft) : NULL; if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -1139,8 +1142,6 @@ retry: spin_lock_bh(&ift->lock); ift->ifpub = ifp; - ift->valid_lft = tmp_valid_lft; - ift->prefered_lft = tmp_prefered_lft; ift->cstamp = now; ift->tstamp = tmp_tstamp; spin_unlock_bh(&ift->lock); @@ -2185,14 +2186,16 @@ ok: ifp = ipv6_add_addr(in6_dev, &addr, NULL, pinfo->prefix_len, addr_type&IPV6_ADDR_SCOPE_MASK, - addr_flags); + addr_flags, valid_lft, + prefered_lft); if (IS_ERR_OR_NULL(ifp)) { in6_dev_put(in6_dev); return; } - update_lft = create = 1; + update_lft = 0; + create = 1; ifp->cstamp = jiffies; ifp->tokenized = tokenized; addrconf_dad_start(ifp); @@ -2213,7 +2216,7 @@ ok: stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!update_lft && stored_lft) { + if (!update_lft && !create && stored_lft) { if (valid_lft > MIN_VALID_LIFETIME || valid_lft > stored_lft) update_lft = 1; @@ -2459,15 +2462,10 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p prefered_lft = timeout; } - ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags); + ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags, + valid_lft, prefered_lft); if (!IS_ERR(ifp)) { - spin_lock_bh(&ifp->lock); - ifp->valid_lft = valid_lft; - ifp->prefered_lft = prefered_lft; - ifp->tstamp = jiffies; - spin_unlock_bh(&ifp->lock); - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, expires, flags); /* @@ -2559,7 +2557,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; - ifp = ipv6_add_addr(idev, addr, NULL, plen, scope, IFA_F_PERMANENT); + ifp = ipv6_add_addr(idev, addr, NULL, plen, + scope, IFA_F_PERMANENT, 0, 0); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -2685,7 +2684,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr #endif - ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags); + ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 0, 0); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); -- cgit v1.2.3 From e0d1095ae3405404d247afb00233ef837d58da83 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Aug 2013 11:10:25 +0800 Subject: net: rename CONFIG_NET_LL_RX_POLL to CONFIG_NET_RX_BUSY_POLL Eliezer renames several *ll_poll to *busy_poll, but forgets CONFIG_NET_LL_RX_POLL, so in case of confusion, rename it too. Cc: Eliezer Tamir Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/Kconfig | 2 +- net/core/skbuff.c | 2 +- net/core/sock.c | 6 +++--- net/core/sysctl_net_core.c | 2 +- net/socket.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 37702491abe..2b406608a1a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -244,7 +244,7 @@ config NETPRIO_CGROUP Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis -config NET_LL_RX_POLL +config NET_RX_BUSY_POLL boolean default y diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3df4d4ccf44..2c3d0f53d19 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -740,7 +740,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) skb_copy_secmark(new, old); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL new->napi_id = old->napi_id; #endif } diff --git a/net/core/sock.c b/net/core/sock.c index 548d716c5f6..2c097c5a35d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -900,7 +900,7 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: /* allow unprivileged users to decrease the value */ if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) @@ -1170,7 +1170,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); break; -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: v.val = sk->sk_ll_usec; break; @@ -2292,7 +2292,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; sk->sk_ll_usec = sysctl_net_busy_read; #endif diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 66096861663..b59b6804fd9 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -298,7 +298,7 @@ static struct ctl_table net_core_table[] = { .proc_handler = flow_limit_table_len_sysctl }, #endif /* CONFIG_NET_FLOW_LIMIT */ -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL { .procname = "busy_poll", .data = &sysctl_net_busy_poll, diff --git a/net/socket.c b/net/socket.c index 829b460acb8..b2d7c629eeb 100644 --- a/net/socket.c +++ b/net/socket.c @@ -106,7 +106,7 @@ #include #include -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; #endif -- cgit v1.2.3 From c756891a4e1c08c43780e17aca1d2b849ef31d1a Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 1 Aug 2013 08:29:18 -0400 Subject: tipc: fix oops when creating server socket fails When creation of TIPC internal server socket fails, we get an oops with the following dump: BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: [] tipc_close_conn+0x59/0xb0 [tipc] PGD 13719067 PUD 12008067 PMD 0 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC Modules linked in: tipc(+) CPU: 4 PID: 4340 Comm: insmod Not tainted 3.10.0+ #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 task: ffff880014360000 ti: ffff88001374c000 task.ti: ffff88001374c000 RIP: 0010:[] [] tipc_close_conn+0x59/0xb0 [tipc] RSP: 0018:ffff88001374dc98 EFLAGS: 00010292 RAX: 0000000000000000 RBX: ffff880012ac09d8 RCX: 0000000000000000 RDX: 0000000000000046 RSI: 0000000000000001 RDI: ffff880014360000 RBP: ffff88001374dcb8 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa0016fa0 R13: ffffffffa0017010 R14: ffffffffa0017010 R15: ffff880012ac09d8 FS: 0000000000000000(0000) GS:ffff880016600000(0063) knlGS:00000000f76668d0 CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b CR2: 0000000000000020 CR3: 0000000012227000 CR4: 00000000000006e0 Stack: ffff88001374dcb8 ffffffffa0016fa0 0000000000000000 0000000000000001 ffff88001374dcf8 ffffffffa0012922 ffff88001374dce8 00000000ffffffea ffffffffa0017100 0000000000000000 ffff8800134241a8 ffffffffa0017150 Call Trace: [] tipc_server_stop+0xa2/0x1b0 [tipc] [] tipc_subscr_stop+0x15/0x20 [tipc] [] tipc_core_stop+0x1d/0x33 [tipc] [] tipc_init+0xd4/0xf8 [tipc] [] ? 0xffffffffa001efff [] do_one_initcall+0x3f/0x150 [] ? __blocking_notifier_call_chain+0x7d/0xd0 [] load_module+0x11aa/0x19c0 [] ? show_initstate+0x50/0x50 [] ? retint_restore_args+0xe/0xe [] SyS_init_module+0xd9/0x110 [] sysenter_dispatch+0x7/0x1f Code: 6c 24 70 4c 89 ef e8 b7 04 8f e1 8b 73 04 4c 89 e7 e8 7c 9e 32 e1 41 83 ac 24 b8 00 00 00 01 4c 89 ef e8 eb 0a 8f e1 48 8b 43 08 <4c> 8b 68 20 4d 8d a5 48 03 00 00 4c 89 e7 e8 04 05 8f e1 4c 89 RIP [] tipc_close_conn+0x59/0xb0 [tipc] RSP CR2: 0000000000000020 ---[ end trace b02321f40e4269a3 ]--- We have the following call chain: tipc_core_start() ret = tipc_subscr_start() ret = tipc_server_start(){ server->enabled = 1; ret = tipc_open_listening_sock() } I.e., the server->enabled flag is unconditionally set to 1, whatever the return value of tipc_open_listening_sock(). This causes a crash when tipc_core_start() tries to clean up resources after a failed initialization: if (ret == failed) tipc_subscr_stop() tipc_server_stop(){ if (server->enabled) tipc_close_conn(){ NULL reference of con->sock-sk OOPS! } } To avoid this, tipc_server_start() should only set server->enabled to 1 in case of a succesful socket creation. In case of failure, it should release all allocated resources before returning. Problem introduced in commit c5fa7b3cf3cb22e4ac60485fc2dc187fe012910f ("tipc: introduce new TIPC server infrastructure") in v3.11-rc1. Note that it won't be seen often; it takes a module load under memory constrained conditions in order to trigger the failure condition. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/server.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/server.c b/net/tipc/server.c index 19da5abe0fa..fd3fa57a410 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -355,8 +355,12 @@ static int tipc_open_listening_sock(struct tipc_server *s) return PTR_ERR(con); sock = tipc_create_listen_sock(con); - if (!sock) + if (!sock) { + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + kfree(con); return -EINVAL; + } tipc_register_callbacks(sock, con); return 0; @@ -563,9 +567,14 @@ int tipc_server_start(struct tipc_server *s) kmem_cache_destroy(s->rcvbuf_cache); return ret; } + ret = tipc_open_listening_sock(s); + if (ret < 0) { + tipc_work_stop(s); + kmem_cache_destroy(s->rcvbuf_cache); + return ret; + } s->enabled = 1; - - return tipc_open_listening_sock(s); + return ret; } void tipc_server_stop(struct tipc_server *s) -- cgit v1.2.3 From cbd375567f7e4811b1c721f75ec519828ac6583f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 1 Aug 2013 22:32:07 -0700 Subject: htb: fix sign extension bug When userspace passes a large priority value the assignment of the unsigned value hopt->prio to signed int cl->prio causes cl->prio to become negative and the comparison is with TC_HTB_NUMPRIO is always false. The result is that HTB crashes by referencing outside the array when processing packets. With this patch the large value wraps around like other values outside the normal range. See: https://bugzilla.kernel.org/show_bug.cgi?id=60669 Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c2124ea29f4..45e751527df 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -100,7 +100,7 @@ struct htb_class { struct psched_ratecfg ceil; s64 buffer, cbuffer;/* token bucket depth/rate */ s64 mbuffer; /* max wait time */ - int prio; /* these two are used only by leaves... */ + u32 prio; /* these two are used only by leaves... */ int quantum; /* but stored for parent-to-leaf return */ struct tcf_proto *filter_list; /* class attached filters */ -- cgit v1.2.3 From 446266b0c742a2c9ee8f0dce759a0117bce58a86 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Aug 2013 11:32:43 +0200 Subject: net: rtm_to_ifaddr: free ifa if ifa_cacheinfo processing fails Commit 5c766d642 ("ipv4: introduce address lifetime") leaves the ifa resource that was allocated via inet_alloc_ifa() unfreed when returning the function with -EINVAL. Thus, free it first via inet_free_ifa(). Signed-off-by: Daniel Borkmann Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 8d48c392adc..34ca6d5a3a4 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -772,7 +772,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ci = nla_data(tb[IFA_CACHEINFO]); if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { err = -EINVAL; - goto errout; + goto errout_free; } *pvalid_lft = ci->ifa_valid; *pprefered_lft = ci->ifa_prefered; @@ -780,6 +780,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, return ifa; +errout_free: + inet_free_ifa(ifa); errout: return ERR_PTR(err); } -- cgit v1.2.3 From 5f671d6b4ec3e6d66c2a868738af2cdea09e7509 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 2 Aug 2013 18:36:40 +0400 Subject: net: check net.core.somaxconn sysctl values It's possible to assign an invalid value to the net.core.somaxconn sysctl variable, because there is no checks at all. The sk_max_ack_backlog field of the sock structure is defined as unsigned short. Therefore, the backlog argument in inet_listen() shouldn't exceed USHRT_MAX. The backlog argument in the listen() syscall is truncated to the somaxconn value. So, the somaxconn value shouldn't exceed 65535 (USHRT_MAX). Also, negative values of somaxconn are meaningless. before: $ sysctl -w net.core.somaxconn=256 net.core.somaxconn = 256 $ sysctl -w net.core.somaxconn=65536 net.core.somaxconn = 65536 $ sysctl -w net.core.somaxconn=-100 net.core.somaxconn = -100 after: $ sysctl -w net.core.somaxconn=256 net.core.somaxconn = 256 $ sysctl -w net.core.somaxconn=65536 error: "Invalid argument" setting key "net.core.somaxconn" $ sysctl -w net.core.somaxconn=-100 error: "Invalid argument" setting key "net.core.somaxconn" Based on a prior patch from Changli Gao. Signed-off-by: Roman Gushchin Reported-by: Changli Gao Suggested-by: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b59b6804fd9..31107abd278 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -21,7 +21,9 @@ #include #include +static int zero = 0; static int one = 1; +static int ushort_max = USHRT_MAX; #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, @@ -339,7 +341,9 @@ static struct ctl_table netns_core_table[] = { .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .extra1 = &zero, + .extra2 = &ushort_max, + .proc_handler = proc_dointvec_minmax }, { } }; -- cgit v1.2.3 From 6a8b7f0c85f1f42eb8b6e68ef3d5ba8020d8e272 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 2 Aug 2013 14:45:08 -0400 Subject: netlabel: use domain based selectors when address based selectors are not available NetLabel has the ability to selectively assign network security labels to outbound traffic based on either the LSM's "domain" (different for each LSM), the network destination, or a combination of both. Depending on the type of traffic, local or forwarded, and the type of traffic selector, domain or address based, different hooks are used to label the traffic; the goal being minimal overhead. Unfortunately, there is a bug such that a system using NetLabel domain based traffic selectors does not correctly label outbound local traffic that is not assigned to a socket. The issue is that in these cases the associated NetLabel hook only looks at the address based selectors and not the domain based selectors. This patch corrects this by checking both the domain and address based selectors so that the correct labeling is applied, regardless of the configuration type. In order to acomplish this fix, this patch also simplifies some of the NetLabel domainhash structures to use a more common outbound traffic mapping type: struct netlbl_dommap_def. This simplifies some of the code in this patch and paves the way for further simplifications in the future. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_cipso_v4.c | 4 +- net/netlabel/netlabel_domainhash.c | 104 +++++++++++++++++-------------------- net/netlabel/netlabel_domainhash.h | 46 ++++++++-------- net/netlabel/netlabel_kapi.c | 88 +++++++++++++------------------ net/netlabel/netlabel_mgmt.c | 44 ++++++++-------- net/netlabel/netlabel_unlabeled.c | 2 +- 6 files changed, 130 insertions(+), 158 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index c15042f987b..a1100640495 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -691,8 +691,8 @@ static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg) { struct netlbl_domhsh_walk_arg *cb_arg = arg; - if (entry->type == NETLBL_NLTYPE_CIPSOV4 && - entry->type_def.cipsov4->doi == cb_arg->doi) + if (entry->def.type == NETLBL_NLTYPE_CIPSOV4 && + entry->def.cipso->doi == cb_arg->doi) return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info); return 0; diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 6bb1d42f0fa..85d842e6e43 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -84,15 +84,15 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry) #endif /* IPv6 */ ptr = container_of(entry, struct netlbl_dom_map, rcu); - if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) { + if (ptr->def.type == NETLBL_NLTYPE_ADDRSELECT) { netlbl_af4list_foreach_safe(iter4, tmp4, - &ptr->type_def.addrsel->list4) { + &ptr->def.addrsel->list4) { netlbl_af4list_remove_entry(iter4); kfree(netlbl_domhsh_addr4_entry(iter4)); } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, - &ptr->type_def.addrsel->list6) { + &ptr->def.addrsel->list6) { netlbl_af6list_remove_entry(iter6); kfree(netlbl_domhsh_addr6_entry(iter6)); } @@ -213,21 +213,21 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry, if (addr4 != NULL) { struct netlbl_domaddr4_map *map4; map4 = netlbl_domhsh_addr4_entry(addr4); - type = map4->type; - cipsov4 = map4->type_def.cipsov4; + type = map4->def.type; + cipsov4 = map4->def.cipso; netlbl_af4list_audit_addr(audit_buf, 0, NULL, addr4->addr, addr4->mask); #if IS_ENABLED(CONFIG_IPV6) } else if (addr6 != NULL) { struct netlbl_domaddr6_map *map6; map6 = netlbl_domhsh_addr6_entry(addr6); - type = map6->type; + type = map6->def.type; netlbl_af6list_audit_addr(audit_buf, 0, NULL, &addr6->addr, &addr6->mask); #endif /* IPv6 */ } else { - type = entry->type; - cipsov4 = entry->type_def.cipsov4; + type = entry->def.type; + cipsov4 = entry->def.cipso; } switch (type) { case NETLBL_NLTYPE_UNLABELED: @@ -265,26 +265,25 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) if (entry == NULL) return -EINVAL; - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_UNLABELED: - if (entry->type_def.cipsov4 != NULL || - entry->type_def.addrsel != NULL) + if (entry->def.cipso != NULL || entry->def.addrsel != NULL) return -EINVAL; break; case NETLBL_NLTYPE_CIPSOV4: - if (entry->type_def.cipsov4 == NULL) + if (entry->def.cipso == NULL) return -EINVAL; break; case NETLBL_NLTYPE_ADDRSELECT: - netlbl_af4list_foreach(iter4, &entry->type_def.addrsel->list4) { + netlbl_af4list_foreach(iter4, &entry->def.addrsel->list4) { map4 = netlbl_domhsh_addr4_entry(iter4); - switch (map4->type) { + switch (map4->def.type) { case NETLBL_NLTYPE_UNLABELED: - if (map4->type_def.cipsov4 != NULL) + if (map4->def.cipso != NULL) return -EINVAL; break; case NETLBL_NLTYPE_CIPSOV4: - if (map4->type_def.cipsov4 == NULL) + if (map4->def.cipso == NULL) return -EINVAL; break; default: @@ -292,9 +291,9 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) } } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach(iter6, &entry->type_def.addrsel->list6) { + netlbl_af6list_foreach(iter6, &entry->def.addrsel->list6) { map6 = netlbl_domhsh_addr6_entry(iter6); - switch (map6->type) { + switch (map6->def.type) { case NETLBL_NLTYPE_UNLABELED: break; default: @@ -402,32 +401,31 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, rcu_assign_pointer(netlbl_domhsh_def, entry); } - if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { + if (entry->def.type == NETLBL_NLTYPE_ADDRSELECT) { netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) + &entry->def.addrsel->list4) netlbl_domhsh_audit_add(entry, iter4, NULL, ret_val, audit_info); #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) + &entry->def.addrsel->list6) netlbl_domhsh_audit_add(entry, NULL, iter6, ret_val, audit_info); #endif /* IPv6 */ } else netlbl_domhsh_audit_add(entry, NULL, NULL, ret_val, audit_info); - } else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT && - entry->type == NETLBL_NLTYPE_ADDRSELECT) { + } else if (entry_old->def.type == NETLBL_NLTYPE_ADDRSELECT && + entry->def.type == NETLBL_NLTYPE_ADDRSELECT) { struct list_head *old_list4; struct list_head *old_list6; - old_list4 = &entry_old->type_def.addrsel->list4; - old_list6 = &entry_old->type_def.addrsel->list6; + old_list4 = &entry_old->def.addrsel->list4; + old_list6 = &entry_old->def.addrsel->list6; /* we only allow the addition of address selectors if all of * the selectors do not exist in the existing domain map */ - netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) + netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) if (netlbl_af4list_search_exact(iter4->addr, iter4->mask, old_list4)) { @@ -435,8 +433,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, goto add_return; } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) + netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) if (netlbl_af6list_search_exact(&iter6->addr, &iter6->mask, old_list6)) { @@ -446,7 +443,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, #endif /* IPv6 */ netlbl_af4list_foreach_safe(iter4, tmp4, - &entry->type_def.addrsel->list4) { + &entry->def.addrsel->list4) { netlbl_af4list_remove_entry(iter4); iter4->valid = 1; ret_val = netlbl_af4list_add(iter4, old_list4); @@ -457,7 +454,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, - &entry->type_def.addrsel->list6) { + &entry->def.addrsel->list6) { netlbl_af6list_remove_entry(iter6); iter6->valid = 1; ret_val = netlbl_af6list_add(iter6, old_list6); @@ -538,18 +535,18 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, struct netlbl_af4list *iter4; struct netlbl_domaddr4_map *map4; - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) { + &entry->def.addrsel->list4) { map4 = netlbl_domhsh_addr4_entry(iter4); - cipso_v4_doi_putdef(map4->type_def.cipsov4); + cipso_v4_doi_putdef(map4->def.cipso); } /* no need to check the IPv6 list since we currently * support only unlabeled protocols for IPv6 */ break; case NETLBL_NLTYPE_CIPSOV4: - cipso_v4_doi_putdef(entry->type_def.cipsov4); + cipso_v4_doi_putdef(entry->def.cipso); break; } call_rcu(&entry->rcu, netlbl_domhsh_free_entry); @@ -590,20 +587,21 @@ int netlbl_domhsh_remove_af4(const char *domain, entry_map = netlbl_domhsh_search(domain); else entry_map = netlbl_domhsh_search_def(domain); - if (entry_map == NULL || entry_map->type != NETLBL_NLTYPE_ADDRSELECT) + if (entry_map == NULL || + entry_map->def.type != NETLBL_NLTYPE_ADDRSELECT) goto remove_af4_failure; spin_lock(&netlbl_domhsh_lock); entry_addr = netlbl_af4list_remove(addr->s_addr, mask->s_addr, - &entry_map->type_def.addrsel->list4); + &entry_map->def.addrsel->list4); spin_unlock(&netlbl_domhsh_lock); if (entry_addr == NULL) goto remove_af4_failure; - netlbl_af4list_foreach_rcu(iter4, &entry_map->type_def.addrsel->list4) + netlbl_af4list_foreach_rcu(iter4, &entry_map->def.addrsel->list4) goto remove_af4_single_addr; #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, &entry_map->type_def.addrsel->list6) + netlbl_af6list_foreach_rcu(iter6, &entry_map->def.addrsel->list6) goto remove_af4_single_addr; #endif /* IPv6 */ /* the domain mapping is empty so remove it from the mapping table */ @@ -616,7 +614,7 @@ remove_af4_single_addr: * shouldn't be a problem */ synchronize_rcu(); entry = netlbl_domhsh_addr4_entry(entry_addr); - cipso_v4_doi_putdef(entry->type_def.cipsov4); + cipso_v4_doi_putdef(entry->def.cipso); kfree(entry); return 0; @@ -693,8 +691,8 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) * responsible for ensuring that rcu_read_[un]lock() is called. * */ -struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, - __be32 addr) +struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr) { struct netlbl_dom_map *dom_iter; struct netlbl_af4list *addr_iter; @@ -702,15 +700,13 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, dom_iter = netlbl_domhsh_search_def(domain); if (dom_iter == NULL) return NULL; - if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) - return NULL; - addr_iter = netlbl_af4list_search(addr, - &dom_iter->type_def.addrsel->list4); + if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT) + return &dom_iter->def; + addr_iter = netlbl_af4list_search(addr, &dom_iter->def.addrsel->list4); if (addr_iter == NULL) return NULL; - - return netlbl_domhsh_addr4_entry(addr_iter); + return &(netlbl_domhsh_addr4_entry(addr_iter)->def); } #if IS_ENABLED(CONFIG_IPV6) @@ -725,7 +721,7 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, * responsible for ensuring that rcu_read_[un]lock() is called. * */ -struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, +struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain, const struct in6_addr *addr) { struct netlbl_dom_map *dom_iter; @@ -734,15 +730,13 @@ struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, dom_iter = netlbl_domhsh_search_def(domain); if (dom_iter == NULL) return NULL; - if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) - return NULL; - addr_iter = netlbl_af6list_search(addr, - &dom_iter->type_def.addrsel->list6); + if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT) + return &dom_iter->def; + addr_iter = netlbl_af6list_search(addr, &dom_iter->def.addrsel->list6); if (addr_iter == NULL) return NULL; - - return netlbl_domhsh_addr6_entry(addr_iter); + return &(netlbl_domhsh_addr6_entry(addr_iter)->def); } #endif /* IPv6 */ diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 90872c4ca30..b9be0eed898 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -43,37 +43,35 @@ #define NETLBL_DOMHSH_BITSIZE 7 /* Domain mapping definition structures */ +struct netlbl_domaddr_map { + struct list_head list4; + struct list_head list6; +}; +struct netlbl_dommap_def { + u32 type; + union { + struct netlbl_domaddr_map *addrsel; + struct cipso_v4_doi *cipso; + }; +}; #define netlbl_domhsh_addr4_entry(iter) \ container_of(iter, struct netlbl_domaddr4_map, list) struct netlbl_domaddr4_map { - u32 type; - union { - struct cipso_v4_doi *cipsov4; - } type_def; + struct netlbl_dommap_def def; struct netlbl_af4list list; }; #define netlbl_domhsh_addr6_entry(iter) \ container_of(iter, struct netlbl_domaddr6_map, list) struct netlbl_domaddr6_map { - u32 type; - - /* NOTE: no 'type_def' union needed at present since we don't currently - * support any IPv6 labeling protocols */ + struct netlbl_dommap_def def; struct netlbl_af6list list; }; -struct netlbl_domaddr_map { - struct list_head list4; - struct list_head list6; -}; + struct netlbl_dom_map { char *domain; - u32 type; - union { - struct cipso_v4_doi *cipsov4; - struct netlbl_domaddr_map *addrsel; - } type_def; + struct netlbl_dommap_def def; u32 valid; struct list_head list; @@ -97,16 +95,16 @@ int netlbl_domhsh_remove_af4(const char *domain, int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); -struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, - __be32 addr); +struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr); +#if IS_ENABLED(CONFIG_IPV6) +struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain, + const struct in6_addr *addr); +#endif /* IPv6 */ + int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, int (*callback) (struct netlbl_dom_map *entry, void *arg), void *cb_arg); -#if IS_ENABLED(CONFIG_IPV6) -struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, - const struct in6_addr *addr); -#endif /* IPv6 */ - #endif diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 7c94aedd091..96a458e12f6 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -122,7 +122,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, } if (addr == NULL && mask == NULL) - entry->type = NETLBL_NLTYPE_UNLABELED; + entry->def.type = NETLBL_NLTYPE_UNLABELED; else if (addr != NULL && mask != NULL) { addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); if (addrmap == NULL) @@ -137,7 +137,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, map4 = kzalloc(sizeof(*map4), GFP_ATOMIC); if (map4 == NULL) goto cfg_unlbl_map_add_failure; - map4->type = NETLBL_NLTYPE_UNLABELED; + map4->def.type = NETLBL_NLTYPE_UNLABELED; map4->list.addr = addr4->s_addr & mask4->s_addr; map4->list.mask = mask4->s_addr; map4->list.valid = 1; @@ -154,7 +154,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, map6 = kzalloc(sizeof(*map6), GFP_ATOMIC); if (map6 == NULL) goto cfg_unlbl_map_add_failure; - map6->type = NETLBL_NLTYPE_UNLABELED; + map6->def.type = NETLBL_NLTYPE_UNLABELED; map6->list.addr = *addr6; map6->list.addr.s6_addr32[0] &= mask6->s6_addr32[0]; map6->list.addr.s6_addr32[1] &= mask6->s6_addr32[1]; @@ -174,8 +174,8 @@ int netlbl_cfg_unlbl_map_add(const char *domain, break; } - entry->type_def.addrsel = addrmap; - entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; } else { ret_val = -EINVAL; goto cfg_unlbl_map_add_failure; @@ -355,8 +355,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, } if (addr == NULL && mask == NULL) { - entry->type_def.cipsov4 = doi_def; - entry->type = NETLBL_NLTYPE_CIPSOV4; + entry->def.cipso = doi_def; + entry->def.type = NETLBL_NLTYPE_CIPSOV4; } else if (addr != NULL && mask != NULL) { addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); if (addrmap == NULL) @@ -367,8 +367,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC); if (addrinfo == NULL) goto out_addrinfo; - addrinfo->type_def.cipsov4 = doi_def; - addrinfo->type = NETLBL_NLTYPE_CIPSOV4; + addrinfo->def.cipso = doi_def; + addrinfo->def.type = NETLBL_NLTYPE_CIPSOV4; addrinfo->list.addr = addr->s_addr & mask->s_addr; addrinfo->list.mask = mask->s_addr; addrinfo->list.valid = 1; @@ -376,8 +376,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, if (ret_val != 0) goto cfg_cipsov4_map_add_failure; - entry->type_def.addrsel = addrmap; - entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; } else { ret_val = -EINVAL; goto out_addrmap; @@ -657,14 +657,14 @@ int netlbl_sock_setattr(struct sock *sk, } switch (family) { case AF_INET: - switch (dom_entry->type) { + switch (dom_entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: ret_val = -EDESTADDRREQ; break; case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, - dom_entry->type_def.cipsov4, - secattr); + dom_entry->def.cipso, + secattr); break; case NETLBL_NLTYPE_UNLABELED: ret_val = 0; @@ -754,23 +754,22 @@ int netlbl_conn_setattr(struct sock *sk, { int ret_val; struct sockaddr_in *addr4; - struct netlbl_domaddr4_map *af4_entry; + struct netlbl_dommap_def *entry; rcu_read_lock(); switch (addr->sa_family) { case AF_INET: addr4 = (struct sockaddr_in *)addr; - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - addr4->sin_addr.s_addr); - if (af4_entry == NULL) { + entry = netlbl_domhsh_getentry_af4(secattr->domain, + addr4->sin_addr.s_addr); + if (entry == NULL) { ret_val = -ENOENT; goto conn_setattr_return; } - switch (af4_entry->type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, - af4_entry->type_def.cipsov4, - secattr); + entry->cipso, secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now @@ -812,36 +811,21 @@ int netlbl_req_setattr(struct request_sock *req, const struct netlbl_lsm_secattr *secattr) { int ret_val; - struct netlbl_dom_map *dom_entry; - struct netlbl_domaddr4_map *af4_entry; - u32 proto_type; - struct cipso_v4_doi *proto_cv4; + struct netlbl_dommap_def *entry; rcu_read_lock(); - dom_entry = netlbl_domhsh_getentry(secattr->domain); - if (dom_entry == NULL) { - ret_val = -ENOENT; - goto req_setattr_return; - } switch (req->rsk_ops->family) { case AF_INET: - if (dom_entry->type == NETLBL_NLTYPE_ADDRSELECT) { - struct inet_request_sock *req_inet = inet_rsk(req); - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - req_inet->rmt_addr); - if (af4_entry == NULL) { - ret_val = -ENOENT; - goto req_setattr_return; - } - proto_type = af4_entry->type; - proto_cv4 = af4_entry->type_def.cipsov4; - } else { - proto_type = dom_entry->type; - proto_cv4 = dom_entry->type_def.cipsov4; + entry = netlbl_domhsh_getentry_af4(secattr->domain, + inet_rsk(req)->rmt_addr); + if (entry == NULL) { + ret_val = -ENOENT; + goto req_setattr_return; } - switch (proto_type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_req_setattr(req, proto_cv4, secattr); + ret_val = cipso_v4_req_setattr(req, + entry->cipso, secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now @@ -899,23 +883,21 @@ int netlbl_skbuff_setattr(struct sk_buff *skb, { int ret_val; struct iphdr *hdr4; - struct netlbl_domaddr4_map *af4_entry; + struct netlbl_dommap_def *entry; rcu_read_lock(); switch (family) { case AF_INET: hdr4 = ip_hdr(skb); - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - hdr4->daddr); - if (af4_entry == NULL) { + entry = netlbl_domhsh_getentry_af4(secattr->domain,hdr4->daddr); + if (entry == NULL) { ret_val = -ENOENT; goto skbuff_setattr_return; } - switch (af4_entry->type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_skbuff_setattr(skb, - af4_entry->type_def.cipsov4, - secattr); + ret_val = cipso_v4_skbuff_setattr(skb, entry->cipso, + secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index c5384ffc614..dd1c37d7acb 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -104,7 +104,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = -ENOMEM; goto add_failure; } - entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); + entry->def.type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); if (info->attrs[NLBL_MGMT_A_DOMAIN]) { size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); entry->domain = kmalloc(tmp_size, GFP_KERNEL); @@ -116,12 +116,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); } - /* NOTE: internally we allow/use a entry->type value of + /* NOTE: internally we allow/use a entry->def.type value of * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users * to pass that as a protocol value because we need to know the * "real" protocol */ - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_UNLABELED: break; case NETLBL_NLTYPE_CIPSOV4: @@ -132,7 +132,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, cipsov4 = cipso_v4_doi_getdef(tmp_val); if (cipsov4 == NULL) goto add_failure; - entry->type_def.cipsov4 = cipsov4; + entry->def.cipso = cipsov4; break; default: goto add_failure; @@ -172,9 +172,9 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map->list.addr = addr->s_addr & mask->s_addr; map->list.mask = mask->s_addr; map->list.valid = 1; - map->type = entry->type; + map->def.type = entry->def.type; if (cipsov4) - map->type_def.cipsov4 = cipsov4; + map->def.cipso = cipsov4; ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); if (ret_val != 0) { @@ -182,8 +182,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info, goto add_failure; } - entry->type = NETLBL_NLTYPE_ADDRSELECT; - entry->type_def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; #if IS_ENABLED(CONFIG_IPV6) } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) { struct in6_addr *addr; @@ -223,7 +223,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; map->list.mask = *mask; map->list.valid = 1; - map->type = entry->type; + map->def.type = entry->def.type; ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); if (ret_val != 0) { @@ -231,8 +231,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info, goto add_failure; } - entry->type = NETLBL_NLTYPE_ADDRSELECT; - entry->type_def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; #endif /* IPv6 */ } @@ -281,14 +281,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; } - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST); if (nla_a == NULL) return -ENOMEM; - netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) { + netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) { struct netlbl_domaddr4_map *map4; struct in_addr addr_struct; @@ -310,13 +309,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; map4 = netlbl_domhsh_addr4_entry(iter4); ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, - map4->type); + map4->def.type); if (ret_val != 0) return ret_val; - switch (map4->type) { + switch (map4->def.type) { case NETLBL_NLTYPE_CIPSOV4: ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, - map4->type_def.cipsov4->doi); + map4->def.cipso->doi); if (ret_val != 0) return ret_val; break; @@ -325,8 +324,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, nla_nest_end(skb, nla_b); } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) { + netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) { struct netlbl_domaddr6_map *map6; nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); @@ -345,7 +343,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; map6 = netlbl_domhsh_addr6_entry(iter6); ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, - map6->type); + map6->def.type); if (ret_val != 0) return ret_val; @@ -356,14 +354,14 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, nla_nest_end(skb, nla_a); break; case NETLBL_NLTYPE_UNLABELED: - ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type); break; case NETLBL_NLTYPE_CIPSOV4: - ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type); if (ret_val != 0) return ret_val; ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, - entry->type_def.cipsov4->doi); + entry->def.cipso->doi); break; } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index af3531926ee..8f0897407a2 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1541,7 +1541,7 @@ int __init netlbl_unlabel_defconf(void) entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return -ENOMEM; - entry->type = NETLBL_NLTYPE_UNLABELED; + entry->def.type = NETLBL_NLTYPE_UNLABELED; ret_val = netlbl_domhsh_add_default(entry, &audit_info); if (ret_val != 0) return ret_val; -- cgit v1.2.3