frr/bgpd/bgp_label.c

473 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/* BGP carrying label information
* Copyright (C) 2013 Cumulus Networks, Inc.
*/
#include <zebra.h>
#include "command.h"
#include "event.h"
#include "prefix.h"
#include "zclient.h"
#include "stream.h"
#include "network.h"
#include "log.h"
#include "memory.h"
#include "nexthop.h"
#include "mpls.h"
#include "bgpd/bgpd.h"
#include "bgpd/bgp_table.h"
#include "bgpd/bgp_route.h"
#include "bgpd/bgp_attr.h"
#include "bgpd/bgp_label.h"
#include "bgpd/bgp_packet.h"
#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_errors.h"
extern struct zclient *zclient;
int bgp_parse_fec_update(void)
{
struct stream *s;
struct bgp_dest *dest;
struct bgp *bgp;
struct bgp_table *table;
struct prefix p;
uint32_t label;
afi_t afi;
safi_t safi;
s = zclient->ibuf;
memset(&p, 0, sizeof(p));
p.family = stream_getw(s);
p.prefixlen = stream_getc(s);
stream_get(p.u.val, s, PSIZE(p.prefixlen));
label = stream_getl(s);
/* hack for the bgp instance & SAFI = have to send/receive it */
afi = family2afi(p.family);
safi = SAFI_UNICAST;
bgp = bgp_get_default();
if (!bgp) {
zlog_debug("no default bgp instance");
return -1;
}
table = bgp->rib[afi][safi];
if (!table) {
zlog_debug("no %u unicast table", p.family);
return -1;
}
dest = bgp_node_lookup(table, &p);
if (!dest) {
zlog_debug("no node for the prefix");
return -1;
}
/* treat it as implicit withdraw - the label is invalid */
if (label == MPLS_INVALID_LABEL)
bgp_unset_valid_label(&dest->local_label);
else {
dest->local_label = mpls_lse_encode(label, 0, 0, 1);
bgp_set_valid_label(&dest->local_label);
}
SET_FLAG(dest->flags, BGP_NODE_LABEL_CHANGED);
bgp_process(bgp, dest, afi, safi);
bgp_dest_unlock_node(dest);
return 1;
}
mpls_label_t bgp_adv_label(struct bgp_dest *dest, struct bgp_path_info *pi,
struct peer *to, afi_t afi, safi_t safi)
{
struct peer *from;
mpls_label_t remote_label;
int reflect;
if (!dest || !pi || !to)
return MPLS_INVALID_LABEL;
remote_label = pi->extra ? pi->extra->label[0] : MPLS_INVALID_LABEL;
from = pi->peer;
reflect =
((from->sort == BGP_PEER_IBGP) && (to->sort == BGP_PEER_IBGP));
if (reflect
&& !CHECK_FLAG(to->af_flags[afi][safi],
PEER_FLAG_FORCE_NEXTHOP_SELF))
return remote_label;
if (CHECK_FLAG(to->af_flags[afi][safi], PEER_FLAG_NEXTHOP_UNCHANGED))
return remote_label;
return dest->local_label;
}
static void bgp_send_fec_register_label_msg(struct bgp_dest *dest, bool reg,
uint32_t label_index)
{
struct stream *s;
int command;
const struct prefix *p;
uint16_t flags = 0;
size_t flags_pos = 0;
mpls_label_t *local_label = &(dest->local_label);
uint32_t ttl = 0;
uint32_t bos = 0;
uint32_t exp = 0;
mpls_label_t label = MPLS_INVALID_LABEL;
bool have_label_to_reg;
mpls_lse_decode(*local_label, &label, &ttl, &exp, &bos);
have_label_to_reg = bgp_is_valid_label(local_label) &&
label != MPLS_LABEL_IMPLICIT_NULL;
p = bgp_dest_get_prefix(dest);
/* Check socket. */
if (!zclient || zclient->sock < 0)
return;
if (BGP_DEBUG(labelpool, LABELPOOL))
zlog_debug("%s: FEC %sregister %pRN label_index=%u label=%u",
__func__, reg ? "" : "un", bgp_dest_to_rnode(dest),
label_index, label);
/* If the route node has a local_label assigned or the
* path node has an MPLS SR label index allowing zebra to
* derive the label, proceed with registration. */
s = zclient->obuf;
stream_reset(s);
command = (reg) ? ZEBRA_FEC_REGISTER : ZEBRA_FEC_UNREGISTER;
zclient_create_header(s, command, VRF_DEFAULT);
flags_pos = stream_get_endp(s); /* save position of 'flags' */
stream_putw(s, flags); /* initial flags */
stream_putw(s, PREFIX_FAMILY(p));
stream_put_prefix(s, p);
if (reg) {
/* label index takes precedence over auto-assigned label. */
if (label_index != 0) {
flags |= ZEBRA_FEC_REGISTER_LABEL_INDEX;
stream_putl(s, label_index);
} else if (have_label_to_reg) {
flags |= ZEBRA_FEC_REGISTER_LABEL;
stream_putl(s, label);
}
SET_FLAG(dest->flags, BGP_NODE_REGISTERED_FOR_LABEL);
} else
UNSET_FLAG(dest->flags, BGP_NODE_REGISTERED_FOR_LABEL);
/* Set length and flags */
stream_putw_at(s, 0, stream_get_endp(s));
/*
* We only need to write new flags if this is a register
*/
if (reg)
stream_putw_at(s, flags_pos, flags);
zclient_send_message(zclient);
}
/**
* This is passed as the callback function to bgp_labelpool.c:bgp_lp_get()
* by bgp_reg_dereg_for_label() when a label needs to be obtained from
* label pool.
* Note that it will reject the allocated label if a label index is found,
* because the label index supposes predictable labels
*/
int bgp_reg_for_label_callback(mpls_label_t new_label, void *labelid,
bool allocated)
{
struct bgp_dest *dest;
dest = labelid;
/*
* if the route had been removed or the request has gone then reject
* the allocated label. The requesting code will have done what is
* required to allocate the correct label
*/
if (!CHECK_FLAG(dest->flags, BGP_NODE_LABEL_REQUESTED)) {
bgp_dest_unlock_node(dest);
return -1;
}
bgp_dest_unlock_node(dest);
if (BGP_DEBUG(labelpool, LABELPOOL))
zlog_debug("%s: FEC %pRN label=%u, allocated=%d", __func__,
bgp_dest_to_rnode(dest), new_label, allocated);
if (!allocated) {
/*
* previously-allocated label is now invalid, set to implicit
* null until new label arrives
*/
if (CHECK_FLAG(dest->flags, BGP_NODE_REGISTERED_FOR_LABEL)) {
UNSET_FLAG(dest->flags, BGP_NODE_LABEL_REQUESTED);
dest->local_label = mpls_lse_encode(
MPLS_LABEL_IMPLICIT_NULL, 0, 0, 1);
bgp_set_valid_label(&dest->local_label);
}
}
dest->local_label = mpls_lse_encode(new_label, 0, 0, 1);
bgp_set_valid_label(&dest->local_label);
/*
* Get back to registering the FEC
*/
bgp_send_fec_register_label_msg(dest, true, 0);
return 0;
}
void bgp_reg_dereg_for_label(struct bgp_dest *dest, struct bgp_path_info *pi,
bool reg)
{
bool with_label_index = false;
const struct prefix *p;
bool have_label_to_reg;
uint32_t ttl = 0;
uint32_t bos = 0;
uint32_t exp = 0;
mpls_label_t label = MPLS_INVALID_LABEL;
mpls_lse_decode(dest->local_label, &label, &ttl, &exp, &bos);
have_label_to_reg = bgp_is_valid_label(&dest->local_label) &&
label != MPLS_LABEL_IMPLICIT_NULL;
p = bgp_dest_get_prefix(dest);
if (BGP_DEBUG(labelpool, LABELPOOL))
zlog_debug("%s: %pFX: %s ", __func__, p,
(reg ? "reg" : "dereg"));
if (reg) {
assert(pi);
/*
* Determine if we will let zebra should derive label from
* label index instead of bgpd requesting from label pool
*/
if (CHECK_FLAG(pi->attr->flag,
ATTR_FLAG_BIT(BGP_ATTR_PREFIX_SID))
&& pi->attr->label_index != BGP_INVALID_LABEL_INDEX) {
with_label_index = true;
UNSET_FLAG(dest->flags, BGP_NODE_LABEL_REQUESTED);
} else {
/*
* If no label has been registered -- assume any label
* from label pool will do. This means that label index
* always takes precedence over auto-assigned labels.
*/
if (!have_label_to_reg) {
SET_FLAG(dest->flags, BGP_NODE_LABEL_REQUESTED);
if (BGP_DEBUG(labelpool, LABELPOOL))
zlog_debug(
"%s: Requesting label from LP for %pFX",
__func__, p);
/* bgp_reg_for_label_callback() will deal with
* fec registration when it gets a label from
* the pool. This means we'll never register
* FECs withoutvalid labels.
*/
bgp_lp_get(LP_TYPE_BGP_LU, dest,
bgp_reg_for_label_callback);
return;
}
}
} else {
UNSET_FLAG(dest->flags, BGP_NODE_LABEL_REQUESTED);
bgp_lp_release(LP_TYPE_BGP_LU, dest, label);
}
bgp_send_fec_register_label_msg(
dest, reg, with_label_index ? pi->attr->label_index : 0);
}
static int bgp_nlri_get_labels(struct peer *peer, uint8_t *pnt, uint8_t plen,
mpls_label_t *label)
{
uint8_t *data = pnt;
uint8_t *lim = pnt + plen;
uint8_t llen = 0;
uint8_t label_depth = 0;
if (plen < BGP_LABEL_BYTES)
return 0;
for (; data < lim; data += BGP_LABEL_BYTES) {
memcpy(label, data, BGP_LABEL_BYTES);
llen += BGP_LABEL_BYTES;
bgp_set_valid_label(label);
label_depth += 1;
if (bgp_is_withdraw_label(label) || label_bos(label))
break;
}
/* If we RX multiple labels we will end up keeping only the last
* one. We do not yet support a label stack greater than 1. */
if (label_depth > 1)
zlog_info("%pBP rcvd UPDATE with label stack %d deep", peer,
label_depth);
if (!(bgp_is_withdraw_label(label) || label_bos(label)))
flog_warn(
EC_BGP_INVALID_LABEL_STACK,
"%pBP rcvd UPDATE with invalid label stack - no bottom of stack",
peer);
return llen;
}
int bgp_nlri_parse_label(struct peer *peer, struct attr *attr,
struct bgp_nlri *packet)
{
uint8_t *pnt;
uint8_t *lim;
struct prefix p;
int psize = 0;
int prefixlen;
afi_t afi;
safi_t safi;
bool addpath_capable;
uint32_t addpath_id;
mpls_label_t label = MPLS_INVALID_LABEL;
uint8_t llen;
pnt = packet->nlri;
lim = pnt + packet->length;
afi = packet->afi;
safi = packet->safi;
addpath_id = 0;
addpath_capable = bgp_addpath_encode_rx(peer, afi, safi);
for (; pnt < lim; pnt += psize) {
/* Clear prefix structure. */
memset(&p, 0, sizeof(p));
if (addpath_capable) {
/* When packet overflow occurs return immediately. */
if (pnt + BGP_ADDPATH_ID_LEN > lim)
return BGP_NLRI_PARSE_ERROR_PACKET_OVERFLOW;
memcpy(&addpath_id, pnt, BGP_ADDPATH_ID_LEN);
addpath_id = ntohl(addpath_id);
pnt += BGP_ADDPATH_ID_LEN;
if (pnt >= lim)
return BGP_NLRI_PARSE_ERROR_PACKET_OVERFLOW;
}
/* Fetch prefix length. */
prefixlen = *pnt++;
p.family = afi2family(packet->afi);
psize = PSIZE(prefixlen);
/* sanity check against packet data */
if ((pnt + psize) > lim) {
flog_err(
EC_BGP_UPDATE_RCV,
"%s [Error] Update packet error / L-U (prefix length %d exceeds packet size %u)",
peer->host, prefixlen, (uint)(lim - pnt));
return BGP_NLRI_PARSE_ERROR_PACKET_OVERFLOW;
}
/* Fill in the labels */
llen = bgp_nlri_get_labels(peer, pnt, psize, &label);
if (llen == 0) {
flog_err(
EC_BGP_UPDATE_RCV,
"%s [Error] Update packet error (wrong label length 0)",
peer->host);
return BGP_NLRI_PARSE_ERROR_LABEL_LENGTH;
}
p.prefixlen = prefixlen - BSIZE(llen);
/* There needs to be at least one label */
if (prefixlen < 24) {
flog_err(EC_BGP_UPDATE_RCV,
"%s [Error] Update packet error (wrong label length %d)",
peer->host, prefixlen);
return BGP_NLRI_PARSE_ERROR_LABEL_LENGTH;
}
if ((afi == AFI_IP && p.prefixlen > IPV4_MAX_BITLEN)
|| (afi == AFI_IP6 && p.prefixlen > IPV6_MAX_BITLEN))
return BGP_NLRI_PARSE_ERROR_PREFIX_LENGTH;
/* Fetch prefix from NLRI packet */
memcpy(&p.u.prefix, pnt + llen, psize - llen);
/* Check address. */
if (afi == AFI_IP && safi == SAFI_LABELED_UNICAST) {
if (IN_CLASSD(ntohl(p.u.prefix4.s_addr))) {
/* From RFC4271 Section 6.3:
*
* If a prefix in the NLRI field is semantically
* incorrect
* (e.g., an unexpected multicast IP address),
* an error SHOULD
* be logged locally, and the prefix SHOULD be
* ignored.
*/
flog_err(
EC_BGP_UPDATE_RCV,
"%s: IPv4 labeled-unicast NLRI is multicast address %pI4, ignoring",
peer->host, &p.u.prefix4);
continue;
}
}
/* Check address. */
if (afi == AFI_IP6 && safi == SAFI_LABELED_UNICAST) {
if (IN6_IS_ADDR_LINKLOCAL(&p.u.prefix6)) {
flog_err(
EC_BGP_UPDATE_RCV,
"%s: IPv6 labeled-unicast NLRI is link-local address %pI6, ignoring",
peer->host, &p.u.prefix6);
continue;
}
if (IN6_IS_ADDR_MULTICAST(&p.u.prefix6)) {
flog_err(
EC_BGP_UPDATE_RCV,
"%s: IPv6 unicast NLRI is multicast address %pI6, ignoring",
peer->host, &p.u.prefix6);
continue;
}
}
if (attr) {
bgp_update(peer, &p, addpath_id, attr, packet->afi,
safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
NULL, &label, 1, 0, NULL);
} else {
bgp_withdraw(peer, &p, addpath_id, packet->afi,
SAFI_UNICAST, ZEBRA_ROUTE_BGP,
BGP_ROUTE_NORMAL, NULL, &label, 1, NULL);
}
}
/* Packet length consistency check. */
if (pnt != lim) {
flog_err(
EC_BGP_UPDATE_RCV,
"%s [Error] Update packet error / L-U (%td data remaining after parsing)",
peer->host, lim - pnt);
return BGP_NLRI_PARSE_ERROR_PACKET_LENGTH;
}
return BGP_NLRI_PARSE_OK;
}