Merge pull request #8564 from rameshabhinay/bgp_tcp_mss

bgpd: Support tcp-mss for bgp neighbors
This commit is contained in:
Donatas Abraitis 2021-05-05 13:45:39 +03:00 committed by GitHub
commit 48c2039199
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 513 additions and 2 deletions

View File

@ -442,6 +442,11 @@ static int bgp_accept(struct thread *thread)
if (peer1) {
/* Dynamic neighbor has been created, let it proceed */
peer1->fd = bgp_sock;
/* Set the user configured MSS to TCP socket */
if (CHECK_FLAG(peer1->flags, PEER_FLAG_TCP_MSS))
sockopt_tcp_mss_set(bgp_sock, peer1->tcp_mss);
bgp_fsm_change_status(peer1, Active);
BGP_TIMER_OFF(
peer1->t_start); /* created in peer_create() */
@ -713,6 +718,10 @@ int bgp_connect(struct peer *peer)
set_nonblocking(peer->fd);
/* Set the user configured MSS to TCP socket */
if (CHECK_FLAG(peer->flags, PEER_FLAG_TCP_MSS))
sockopt_tcp_mss_set(peer->fd, peer->tcp_mss);
bgp_socket_set_buffer_size(peer->fd);
if (bgp_set_socket_ttl(peer, peer->fd) < 0)

View File

@ -22,6 +22,7 @@
#include "command.h"
#include "lib/json.h"
#include "lib/sockopt.h"
#include "lib_errors.h"
#include "lib/zclient.h"
#include "lib/printfrr.h"
@ -12594,6 +12595,7 @@ static void bgp_show_peer(struct vty *vty, struct peer *p, bool use_json,
uint8_t *msg;
json_object *json_neigh = NULL;
time_t epoch_tbuf;
uint32_t sync_tcp_mss;
bgp = p->bgp;
@ -12855,6 +12857,15 @@ static void bgp_show_peer(struct vty *vty, struct peer *p, bool use_json,
p->v_delayopen * 1000);
}
/* Configured and Synced tcp-mss value for peer */
if (CHECK_FLAG(p->flags, PEER_FLAG_TCP_MSS)) {
sync_tcp_mss = sockopt_tcp_mss_get(p->fd);
json_object_int_add(json_neigh, "bgpTcpMssConfigured",
p->tcp_mss);
json_object_int_add(json_neigh, "bgpTcpMssSynced",
sync_tcp_mss);
}
if (CHECK_FLAG(p->flags, PEER_FLAG_TIMER)) {
json_object_int_add(json_neigh,
"bgpTimerConfiguredHoldTimeMsecs",
@ -12938,6 +12949,13 @@ static void bgp_show_peer(struct vty *vty, struct peer *p, bool use_json,
vty_out(vty,
" Configured DelayOpenTime is %d seconds\n",
p->delayopen);
/* Configured and synced tcp-mss value for peer */
if (CHECK_FLAG(p->flags, PEER_FLAG_TCP_MSS)) {
sync_tcp_mss = sockopt_tcp_mss_get(p->fd);
vty_out(vty, " Configured tcp-mss is %d", p->tcp_mss);
vty_out(vty, ", synced tcp-mss is %d\n", sync_tcp_mss);
}
}
/* Capability. */
if (p->status == Established) {
@ -16357,6 +16375,55 @@ void cli_show_bgp_global_afi_safi_ip_unicast_redistribution_list(
vty_out(vty, "\n");
}
/* Neighbor update tcp-mss. */
static int peer_tcp_mss_vty(struct vty *vty, const char *peer_str,
const char *tcp_mss_str)
{
struct peer *peer;
uint32_t tcp_mss_val = 0;
peer = peer_and_group_lookup_vty(vty, peer_str);
if (!peer)
return CMD_WARNING_CONFIG_FAILED;
if (tcp_mss_str) {
tcp_mss_val = strtoul(tcp_mss_str, NULL, 10);
peer_tcp_mss_set(peer, tcp_mss_val);
} else {
peer_tcp_mss_unset(peer);
}
return CMD_SUCCESS;
}
DEFUN(neighbor_tcp_mss, neighbor_tcp_mss_cmd,
"neighbor <A.B.C.D|X:X::X:X|WORD> tcp-mss (1-65535)",
NEIGHBOR_STR NEIGHBOR_ADDR_STR2
"TCP max segment size\n"
"TCP MSS value\n")
{
int peer_index = 1;
int mss_index = 3;
vty_out(vty,
" Warning: Reset BGP session for tcp-mss value to take effect\n");
return peer_tcp_mss_vty(vty, argv[peer_index]->arg,
argv[mss_index]->arg);
}
DEFUN(no_neighbor_tcp_mss, no_neighbor_tcp_mss_cmd,
"no neighbor <A.B.C.D|X:X::X:X|WORD> tcp-mss [(1-65535)]",
NO_STR NEIGHBOR_STR NEIGHBOR_ADDR_STR2
"TCP max segment size\n"
"TCP MSS value\n")
{
int peer_index = 2;
vty_out(vty,
" Warning: Reset BGP session for tcp-mss value to take effect\n");
return peer_tcp_mss_vty(vty, argv[peer_index]->arg, NULL);
}
static void bgp_config_write_redistribute(struct vty *vty, struct bgp *bgp,
afi_t afi, safi_t safi)
{
@ -16806,6 +16873,10 @@ static void bgp_config_write_peer_global(struct vty *vty, struct bgp *bgp,
vty_out(vty, " neighbor %s interface %s\n", addr, peer->ifname);
}
/* TCP max segment size */
if (CHECK_FLAG(peer->flags, PEER_FLAG_TCP_MSS))
vty_out(vty, " neighbor %s tcp-mss %d\n", addr, peer->tcp_mss);
/* passive */
if (peergroup_flag_check(peer, PEER_FLAG_PASSIVE))
vty_out(vty, " neighbor %s passive\n", addr);
@ -19217,6 +19288,10 @@ void bgp_vty_init(void)
install_element(BGP_IPV6_NODE, &af_no_route_map_vpn_imexport_cmd);
install_element(BGP_IPV4_NODE, &af_no_import_vrf_route_map_cmd);
install_element(BGP_IPV6_NODE, &af_no_import_vrf_route_map_cmd);
/* tcp-mss command */
install_element(BGP_NODE, &neighbor_tcp_mss_cmd);
install_element(BGP_NODE, &no_neighbor_tcp_mss_cmd);
}
#include "memory.h"

View File

@ -43,6 +43,7 @@
#include "jhash.h"
#include "table.h"
#include "lib/json.h"
#include "lib/sockopt.h"
#include "frr_pthread.h"
#include "bitfield.h"
@ -1439,6 +1440,8 @@ void peer_xfer_config(struct peer *peer_dst, struct peer *peer_src)
peer_dst->local_as = peer_src->local_as;
peer_dst->port = peer_src->port;
/* copy tcp_mss value */
peer_dst->tcp_mss = peer_src->tcp_mss;
(void)peer_sort(peer_dst);
peer_dst->rmap_type = peer_src->rmap_type;
@ -5116,6 +5119,26 @@ void peer_port_unset(struct peer *peer)
peer->port = BGP_PORT_DEFAULT;
}
/* Set the TCP-MSS value in the peer structure,
* This gets applied only after connection reset
* So this value will be used in bgp_connect.
*/
void peer_tcp_mss_set(struct peer *peer, uint32_t tcp_mss)
{
peer->tcp_mss = tcp_mss;
SET_FLAG(peer->flags, PEER_FLAG_TCP_MSS);
}
/* Reset the TCP-MSS value in the peer structure,
* This gets applied only after connection reset
* So this value will be used in bgp_connect.
*/
void peer_tcp_mss_unset(struct peer *peer)
{
UNSET_FLAG(peer->flags, PEER_FLAG_TCP_MSS);
peer->tcp_mss = 0;
}
/*
* Helper function that is called after the name of the policy
* being used by a peer has changed (AF specific). Automatically

View File

@ -1241,6 +1241,8 @@ struct peer {
#define PEER_FLAG_GRACEFUL_RESTART (1U << 24) /* Graceful Restart */
#define PEER_FLAG_GRACEFUL_RESTART_GLOBAL_INHERIT (1U << 25) /* Global-Inherit */
#define PEER_FLAG_RTT_SHUTDOWN (1U << 26) /* shutdown rtt */
#define PEER_FLAG_TIMER_DELAYOPEN (1U << 27) /* delayopen timer */
#define PEER_FLAG_TCP_MSS (1U << 28) /* tcp-mss */
/*
*GR-Disabled mode means unset PEER_FLAG_GRACEFUL_RESTART
@ -1248,8 +1250,6 @@ struct peer {
*and PEER_FLAG_GRACEFUL_RESTART_GLOBAL_INHERIT
*/
#define PEER_FLAG_TIMER_DELAYOPEN (1 << 27) /* delayopen timer */
struct bgp_peer_gr PEER_GR_FSM[BGP_PEER_GR_MODE][BGP_PEER_GR_EVENT_CMD];
enum peer_mode peer_gr_present_state;
/* Non stop forwarding afi-safi count for BGP gr feature*/
@ -1606,6 +1606,9 @@ struct peer {
bool advmap_config_change[AFI_MAX][SAFI_MAX];
bool advmap_table_change;
/* set TCP max segment size */
uint32_t tcp_mss;
QOBJ_FIELDS;
};
DECLARE_QOBJ_TYPE(peer);
@ -2409,4 +2412,6 @@ DECLARE_HOOK(bgp_rpki_prefix_status,
void peer_nsf_stop(struct peer *peer);
void peer_tcp_mss_set(struct peer *peer, uint32_t tcp_mss);
void peer_tcp_mss_unset(struct peer *peer);
#endif /* _QUAGGA_BGPD_H */

View File

@ -3908,6 +3908,147 @@ Example of how to set up a 6-Bone connection.
log file bgpd.log
!
.. _bgp-tcp-mss:
BGP tcp-mss support
===================
TCP provides a mechanism for the user to specify the max segment size.
setsockopt API is used to set the max segment size for TCP session. We
can configure this as part of BGP neighbor configuration.
This document explains how to avoid ICMP vulnerability issues by limiting
TCP max segment size when you are using MTU discovery. Using MTU discovery
on TCP paths is one method of avoiding BGP packet fragmentation.
TCP negotiates a maximum segment size (MSS) value during session connection
establishment between two peers. The MSS value negotiated is primarily based
on the maximum transmission unit (MTU) of the interfaces to which the
communicating peers are directly connected. However, due to variations in
link MTU on the path taken by the TCP packets, some packets in the network
that are well within the MSS value might be fragmented when the packet size
exceeds the link's MTU.
This feature is supported with TCP over IPv4 and TCP over IPv6.
CLI Configuration:
------------------
Below configuration can be done in router bgp mode and allows the user to
configure the tcp-mss value per neighbor. The configuration gets applied
only after hard reset is performed on that neighbor. If we configure tcp-mss
on both the neighbors then both neighbors need to be reset.
The configuration takes effect based on below rules, so there is a configured
tcp-mss and a synced tcp-mss value per TCP session.
By default if the configuration is not done then the TCP max segment size is
set to the Maximum Transmission unit (MTU) (IP/IP6 header size + TCP header
size + ethernet header). For IPv4 its MTU (20 bytes IP header + 20 bytes TCP
header + 12 bytes ethernet header) and for IPv6 its MTU (40 bytes IPv6 header
+ 20 bytes TCP header + 12 bytes ethernet header).
If the config is done then it reduces 12-14 bytes for the ether header and
uses it after synchronizing in TCP handshake.
.. clicmd:: neighbor <A.B.C.D|X:X::X:X|WORD> tcp-mss (1-65535)
When tcp-mss is configured kernel reduces 12-14 bytes for ethernet header.
E.g. if tcp-mss is configured as 150 the synced value will be 138.
Note: configured and synced value is different since TCP module will reduce
12 bytes for ethernet header.
Running config:
---------------
.. code-block:: frr
frr# show running-config
Building configuration...
Current configuration:
!
router bgp 100
bgp router-id 192.0.2.1
neighbor 198.51.100.2 remote-as 100
neighbor 198.51.100.2 tcp-mss 150 => new entry
neighbor 2001:DB8::2 remote-as 100
neighbor 2001:DB8::2 tcp-mss 400 => new entry
Show command:
-------------
.. code-block:: frr
frr# show bgp neighbors 198.51.100.2
BGP neighbor is 198.51.100.2, remote AS 100, local AS 100, internal link
Hostname: frr
BGP version 4, remote router ID 192.0.2.2, local router ID 192.0.2.1
BGP state = Established, up for 02:15:28
Last read 00:00:28, Last write 00:00:28
Hold time is 180, keepalive interval is 60 seconds
Configured tcp-mss is 150, synced tcp-mss is 138 => new display
.. code-block:: frr
frr# show bgp neighbors 2001:DB8::2
BGP neighbor is 2001:DB8::2, remote AS 100, local AS 100, internal link
Hostname: frr
BGP version 4, remote router ID 192.0.2.2, local router ID 192.0.2.1
BGP state = Established, up for 02:16:34
Last read 00:00:34, Last write 00:00:34
Hold time is 180, keepalive interval is 60 seconds
Configured tcp-mss is 400, synced tcp-mss is 388 => new display
Show command json output:
-------------------------
.. code-block:: frr
frr# show bgp neighbors 2001:DB8::2 json
{
"2001:DB8::2":{
"remoteAs":100,
"localAs":100,
"nbrInternalLink":true,
"hostname":"frr",
"bgpVersion":4,
"remoteRouterId":"192.0.2.2",
"localRouterId":"192.0.2.1",
"bgpState":"Established",
"bgpTimerUpMsec":8349000,
"bgpTimerUpString":"02:19:09",
"bgpTimerUpEstablishedEpoch":1613054251,
"bgpTimerLastRead":9000,
"bgpTimerLastWrite":9000,
"bgpInUpdateElapsedTimeMsecs":8347000,
"bgpTimerHoldTimeMsecs":180000,
"bgpTimerKeepAliveIntervalMsecs":60000,
"bgpTcpMssConfigured":400, => new entry
"bgpTcpMssSynced":388, => new entry
.. code-block:: frr
frr# show bgp neighbors 198.51.100.2 json
{
"198.51.100.2":{
"remoteAs":100,
"localAs":100,
"nbrInternalLink":true,
"hostname":"frr",
"bgpVersion":4,
"remoteRouterId":"192.0.2.2",
"localRouterId":"192.0.2.1",
"bgpState":"Established",
"bgpTimerUpMsec":8370000,
"bgpTimerUpString":"02:19:30",
"bgpTimerUpEstablishedEpoch":1613054251,
"bgpTimerLastRead":30000,
"bgpTimerLastWrite":30000,
"bgpInUpdateElapsedTimeMsecs":8368000,
"bgpTimerHoldTimeMsecs":180000,
"bgpTimerKeepAliveIntervalMsecs":60000,
"bgpTcpMssConfigured":150, => new entry
"bgpTcpMssSynced":138, => new entry
.. include:: routeserver.rst

View File

@ -667,3 +667,39 @@ int sockopt_tcp_signature(int sock, union sockunion *su, const char *password)
{
return sockopt_tcp_signature_ext(sock, su, 0, password);
}
/* set TCP mss value to socket */
int sockopt_tcp_mss_set(int sock, int tcp_maxseg)
{
int ret = 0;
socklen_t tcp_maxseg_len = sizeof(tcp_maxseg);
ret = setsockopt(sock, IPPROTO_TCP, TCP_MAXSEG, &tcp_maxseg,
tcp_maxseg_len);
if (ret != 0) {
flog_err_sys(EC_LIB_SYSTEM_CALL,
"%s failed: setsockopt(%d): %s", __func__, sock,
safe_strerror(errno));
}
return ret;
}
/* get TCP mss value synced by socket */
int sockopt_tcp_mss_get(int sock)
{
int ret = 0;
int tcp_maxseg = 0;
socklen_t tcp_maxseg_len = sizeof(tcp_maxseg);
ret = getsockopt(sock, IPPROTO_TCP, TCP_MAXSEG, &tcp_maxseg,
&tcp_maxseg_len);
if (ret != 0) {
flog_err_sys(EC_LIB_SYSTEM_CALL,
"%s failed: getsockopt(%d): %s", __func__, sock,
safe_strerror(errno));
return 0;
}
return tcp_maxseg;
}

View File

@ -133,6 +133,26 @@ extern int sockopt_tcp_signature(int sock, union sockunion *su,
extern int sockopt_tcp_signature_ext(int sock, union sockunion *su,
uint16_t prefixlen, const char *password);
/*
* set TCP max segment size. This option allows user to configure
* max segment size for TCP session
*
* sock
* Socket to enable option on.
*
* tcp_maxseg
* value used for TCP segment size negotiation during SYN
*/
extern int sockopt_tcp_mss_set(int sock, int tcp_maxseg);
/*
* get TCP max segment size. This option allows user to get
* the segment size for TCP session
*
* sock
* Socket to get max segement size.
*/
extern int sockopt_tcp_mss_get(int sock);
#ifdef __cplusplus
}
#endif

View File

View File

@ -0,0 +1,6 @@
router bgp 65000
no bgp ebgp-requires-policy
neighbor 192.168.255.2 remote-as 65001
neighbor 192.168.255.2 timers 3 10
exit-address-family
!

View File

@ -0,0 +1,6 @@
!
interface r1-eth0
ip address 192.168.255.1/24
!
ip forwarding
!

View File

@ -0,0 +1,6 @@
router bgp 65001
no bgp ebgp-requires-policy
neighbor 192.168.255.1 remote-as 65000
neighbor 192.168.255.1 timers 3 10
exit-address-family
!

View File

@ -0,0 +1,6 @@
!
interface r2-eth0
ip address 192.168.255.2/24
!
ip forwarding
!

View File

@ -0,0 +1,178 @@
#!/usr/bin/env python
#
# bgp_tcp_mss.py
# Part of NetDEF Topology Tests
#
# Copyright (c) 2021 by
# Abhinay Ramesh <rabhinay@vmware.com>
#
# Permission to use, copy, modify, and/or distribute this software
# for any purpose with or without fee is hereby granted, provided
# that the above copyright notice and this permission notice appear
# in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND NETDEF DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NETDEF BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THIS SOFTWARE.
#
"""
bgp_tcp_mss.py:
Test if works the following commands:
router bgp 65000
neighbor 192.168.255.2 tcp-mss 500
Need to verify if the tcp-mss value is reflected in the TCP session.
"""
import os
import sys
import json
import time
import pytest
import functools
# add after imports, before defining classes or functions:
pytestmark = [pytest.mark.bgpd]
CWD = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(CWD, "../"))
# pylint: disable=C0413
from lib import topotest
from lib.topogen import Topogen, TopoRouter, get_topogen
from lib.topolog import logger
from mininet.topo import Topo
class TemplateTopo(Topo):
def build(self, *_args, **_opts):
tgen = get_topogen(self)
for routern in range(1, 3):
tgen.add_router("r{}".format(routern))
switch = tgen.add_switch("s1")
switch.add_link(tgen.gears["r1"])
switch.add_link(tgen.gears["r2"])
def setup_module(mod):
tgen = Topogen(TemplateTopo, mod.__name__)
tgen.start_topology()
router_list = tgen.routers()
for i, (rname, router) in enumerate(router_list.items(), 1):
router.load_config(
TopoRouter.RD_ZEBRA, os.path.join(CWD, "{}/zebra.conf".format(rname))
)
router.load_config(
TopoRouter.RD_BGP, os.path.join(CWD, "{}/bgpd.conf".format(rname))
)
tgen.start_router()
def teardown_module(mod):
tgen = get_topogen()
tgen.stop_topology()
def test_bgp_tcp_mss():
tgen = get_topogen()
if tgen.routers_have_failure():
pytest.skip(tgen.errors)
router1 = tgen.gears["r1"]
router2 = tgen.gears["r2"]
def _bgp_converge(router):
output = json.loads(router.vtysh_cmd("show ip bgp neighbor 192.168.255.2 json"))
expected = {
"192.168.255.2": {
"bgpState": "Established",
"addressFamilyInfo": {"ipv4Unicast": {"acceptedPrefixCounter": 0}},
}
}
return topotest.json_cmp(output, expected)
def _bgp_conf_tcp_mss(router, as_num, neigh):
router.vtysh_cmd(
"""configure terminal
router bgp {0}
neighbor {1} tcp-mss 500""".format(
as_num, neigh
)
)
def _bgp_clear_session(router):
router.vtysh_cmd("clear bgp *")
def _bgp_check_neighbor_tcp_mss(router, neigh):
output = json.loads(router.vtysh_cmd("show bgp neighbor {} json".format(neigh)))
expected = {
"{}".format(neigh): {"bgpTcpMssConfigured": 500, "bgpTcpMssSynced": 488}
}
return topotest.json_cmp(output, expected)
logger.info("Check if neighbor sessions are up in {}".format(router1.name))
test_func = functools.partial(_bgp_converge, router1)
success, result = topotest.run_and_expect(test_func, None, count=15, wait=0.5)
assert result is None, 'Failed to see BGP convergence in "{}"'.format(router1.name)
logger.info("BGP neighbor session is up in {}".format(router1.name))
logger.info(
"Configure tcp-mss 500 on {} and reset the session".format(router1.name)
)
_bgp_conf_tcp_mss(router1, "65000", "192.168.255.2")
_bgp_clear_session(router1)
logger.info(
"Configure tcp-mss 500 on {} and reset the session".format(router2.name)
)
_bgp_conf_tcp_mss(router2, "65001", "192.168.255.1")
_bgp_clear_session(router2)
logger.info(
"Check if neighbor session is up after reset in {}".format(router1.name)
)
test_func = functools.partial(_bgp_converge, router1)
success, result = topotest.run_and_expect(test_func, None, count=15, wait=0.5)
assert result is None, 'Failed to see BGP convergence after reset in "{}"'.format(
router1.name
)
logger.info(
"Verify if TCP MSS value is synced with neighbor in {}".format(router1.name)
)
test_func = functools.partial(_bgp_check_neighbor_tcp_mss, router1, "192.168.255.2")
success, result = topotest.run_and_expect(test_func, None, count=3, wait=0.5)
assert (
result is None
), 'Failed to sync TCP MSS value over BGP session in "{}"'.format(router1.name)
logger.info("TCP MSS value is synced with neighbor in {}".format(router1.name))
logger.info(
"Verify if TCP MSS value is synced with neighbor in {}".format(router2.name)
)
test_func = functools.partial(_bgp_check_neighbor_tcp_mss, router2, "192.168.255.1")
success, result = topotest.run_and_expect(test_func, None, count=3, wait=0.5)
assert (
result is None
), 'Failed to sync TCP MSS value over BGP session in "{}"'.format(router2.name)
logger.info("TCP MSS value is synced with neighbor in {}".format(router2.name))
if __name__ == "__main__":
args = ["-s"] + sys.argv[1:]
sys.exit(pytest.main(args))