src/usr.sbin/bgpd/kroute.c

3295 lines
74 KiB
C

/* $OpenBSD: kroute.c,v 1.309 2024/01/09 13:41:32 claudio Exp $ */
/*
* Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
* Copyright (c) 2022 Claudio Jeker <claudio@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/types.h>
#include <sys/queue.h>
#include <sys/tree.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/uio.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/route.h>
#include <netmpls/mpls.h>
#include <errno.h>
#include <ifaddrs.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <imsg.h>
#include "bgpd.h"
#include "log.h"
#define RTP_MINE 0xff
struct ktable **krt;
u_int krt_size;
struct {
uint32_t rtseq;
pid_t pid;
int fd;
uint8_t fib_prio;
} kr_state;
struct kroute {
RB_ENTRY(kroute) entry;
struct kroute *next;
struct in_addr prefix;
struct in_addr nexthop;
uint32_t mplslabel;
uint16_t flags;
uint16_t labelid;
u_short ifindex;
uint8_t prefixlen;
uint8_t priority;
};
struct kroute6 {
RB_ENTRY(kroute6) entry;
struct kroute6 *next;
struct in6_addr prefix;
struct in6_addr nexthop;
uint32_t prefix_scope_id; /* because ... */
uint32_t nexthop_scope_id;
uint32_t mplslabel;
uint16_t flags;
uint16_t labelid;
u_short ifindex;
uint8_t prefixlen;
uint8_t priority;
};
struct knexthop {
RB_ENTRY(knexthop) entry;
struct bgpd_addr nexthop;
void *kroute;
u_short ifindex;
};
struct kredist_node {
RB_ENTRY(kredist_node) entry;
struct bgpd_addr prefix;
uint64_t rd;
uint8_t prefixlen;
uint8_t dynamic;
};
struct kif {
RB_ENTRY(kif) entry;
char ifname[IFNAMSIZ];
uint64_t baudrate;
u_int rdomain;
int flags;
u_short ifindex;
uint8_t if_type;
uint8_t link_state;
uint8_t nh_reachable; /* for nexthop verification */
uint8_t depend_state; /* for session depend on */
};
int ktable_new(u_int, u_int, char *, int);
void ktable_free(u_int);
void ktable_destroy(struct ktable *);
struct ktable *ktable_get(u_int);
int kr4_change(struct ktable *, struct kroute_full *);
int kr6_change(struct ktable *, struct kroute_full *);
int krVPN4_change(struct ktable *, struct kroute_full *);
int krVPN6_change(struct ktable *, struct kroute_full *);
int kr_net_match(struct ktable *, struct network_config *, uint16_t, int);
struct network *kr_net_find(struct ktable *, struct network *);
void kr_net_clear(struct ktable *);
void kr_redistribute(int, struct ktable *, struct kroute_full *);
uint8_t kr_priority(struct kroute_full *);
struct kroute_full *kr_tofull(struct kroute *);
struct kroute_full *kr6_tofull(struct kroute6 *);
int kroute_compare(struct kroute *, struct kroute *);
int kroute6_compare(struct kroute6 *, struct kroute6 *);
int knexthop_compare(struct knexthop *, struct knexthop *);
int kredist_compare(struct kredist_node *, struct kredist_node *);
int kif_compare(struct kif *, struct kif *);
struct kroute *kroute_find(struct ktable *, const struct bgpd_addr *,
uint8_t, uint8_t);
struct kroute *kroute_matchgw(struct kroute *, struct kroute_full *);
int kroute_insert(struct ktable *, struct kroute_full *);
int kroute_remove(struct ktable *, struct kroute_full *, int);
void kroute_clear(struct ktable *);
struct kroute6 *kroute6_find(struct ktable *, const struct bgpd_addr *,
uint8_t, uint8_t);
struct kroute6 *kroute6_matchgw(struct kroute6 *, struct kroute_full *);
void kroute6_clear(struct ktable *);
struct knexthop *knexthop_find(struct ktable *, struct bgpd_addr *);
int knexthop_insert(struct ktable *, struct knexthop *);
void knexthop_remove(struct ktable *, struct knexthop *);
void knexthop_clear(struct ktable *);
struct kif *kif_find(int);
int kif_insert(struct kif *);
int kif_remove(struct kif *);
void kif_clear(void);
int kroute_validate(struct kroute *);
int kroute6_validate(struct kroute6 *);
int knexthop_true_nexthop(struct ktable *, struct kroute_full *);
void knexthop_validate(struct ktable *, struct knexthop *);
void knexthop_track(struct ktable *, u_short);
void knexthop_update(struct ktable *, struct kroute_full *);
void knexthop_send_update(struct knexthop *);
struct kroute *kroute_match(struct ktable *, struct bgpd_addr *, int);
struct kroute6 *kroute6_match(struct ktable *, struct bgpd_addr *, int);
void kroute_detach_nexthop(struct ktable *, struct knexthop *);
uint8_t prefixlen_classful(in_addr_t);
uint64_t ift2ifm(uint8_t);
const char *get_media_descr(uint64_t);
const char *get_linkstate(uint8_t, int);
void get_rtaddrs(int, struct sockaddr *, struct sockaddr **);
void if_change(u_short, int, struct if_data *);
void if_announce(void *);
int send_rtmsg(int, struct ktable *, struct kroute_full *);
int dispatch_rtmsg(void);
int fetchtable(struct ktable *);
int fetchifs(int);
int dispatch_rtmsg_addr(struct rt_msghdr *, struct kroute_full *);
int kr_fib_delete(struct ktable *, struct kroute_full *, int);
int kr_fib_change(struct ktable *, struct kroute_full *, int, int);
RB_PROTOTYPE(kroute_tree, kroute, entry, kroute_compare)
RB_GENERATE(kroute_tree, kroute, entry, kroute_compare)
RB_PROTOTYPE(kroute6_tree, kroute6, entry, kroute6_compare)
RB_GENERATE(kroute6_tree, kroute6, entry, kroute6_compare)
RB_PROTOTYPE(knexthop_tree, knexthop, entry, knexthop_compare)
RB_GENERATE(knexthop_tree, knexthop, entry, knexthop_compare)
RB_PROTOTYPE(kredist_tree, kredist_node, entry, kredist_compare)
RB_GENERATE(kredist_tree, kredist_node, entry, kredist_compare)
RB_HEAD(kif_tree, kif) kit;
RB_PROTOTYPE(kif_tree, kif, entry, kif_compare)
RB_GENERATE(kif_tree, kif, entry, kif_compare)
#define KT2KNT(x) (&(ktable_get((x)->nhtableid)->knt))
/*
* exported functions
*/
int
kr_init(int *fd, uint8_t fib_prio)
{
int opt = 0, rcvbuf, default_rcvbuf;
unsigned int tid = RTABLE_ANY;
socklen_t optlen;
if ((kr_state.fd = socket(AF_ROUTE,
SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)) == -1) {
log_warn("%s: socket", __func__);
return (-1);
}
/* not interested in my own messages */
if (setsockopt(kr_state.fd, SOL_SOCKET, SO_USELOOPBACK,
&opt, sizeof(opt)) == -1)
log_warn("%s: setsockopt", __func__); /* not fatal */
/* grow receive buffer, don't wanna miss messages */
optlen = sizeof(default_rcvbuf);
if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
&default_rcvbuf, &optlen) == -1)
log_warn("%s: getsockopt SOL_SOCKET SO_RCVBUF", __func__);
else
for (rcvbuf = MAX_RTSOCK_BUF;
rcvbuf > default_rcvbuf &&
setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
&rcvbuf, sizeof(rcvbuf)) == -1 && errno == ENOBUFS;
rcvbuf /= 2)
; /* nothing */
if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_TABLEFILTER, &tid,
sizeof(tid)) == -1) {
log_warn("%s: setsockopt AF_ROUTE ROUTE_TABLEFILTER", __func__);
return (-1);
}
kr_state.pid = getpid();
kr_state.rtseq = 1;
kr_state.fib_prio = fib_prio;
RB_INIT(&kit);
if (fetchifs(0) == -1)
return (-1);
*fd = kr_state.fd;
return (0);
}
int
kr_default_prio(void)
{
return RTP_BGP;
}
int
kr_check_prio(long long prio)
{
if (prio <= RTP_LOCAL || prio > RTP_MAX)
return 0;
return 1;
}
int
ktable_new(u_int rtableid, u_int rdomid, char *name, int fs)
{
struct ktable **xkrt;
struct ktable *kt;
size_t oldsize;
/* resize index table if needed */
if (rtableid >= krt_size) {
oldsize = sizeof(struct ktable *) * krt_size;
if ((xkrt = reallocarray(krt, rtableid + 1,
sizeof(struct ktable *))) == NULL) {
log_warn("%s", __func__);
return (-1);
}
krt = xkrt;
krt_size = rtableid + 1;
memset((char *)krt + oldsize, 0,
krt_size * sizeof(struct ktable *) - oldsize);
}
if (krt[rtableid])
fatalx("ktable_new: table already exists.");
/* allocate new element */
kt = krt[rtableid] = calloc(1, sizeof(struct ktable));
if (kt == NULL) {
log_warn("%s", __func__);
return (-1);
}
/* initialize structure ... */
strlcpy(kt->descr, name, sizeof(kt->descr));
RB_INIT(&kt->krt);
RB_INIT(&kt->krt6);
RB_INIT(&kt->knt);
TAILQ_INIT(&kt->krn);
kt->fib_conf = kt->fib_sync = fs;
kt->rtableid = rtableid;
kt->nhtableid = rdomid;
/* bump refcount of rdomain table for the nexthop lookups */
ktable_get(kt->nhtableid)->nhrefcnt++;
/* ... and load it */
if (fetchtable(kt) == -1)
return (-1);
/* everything is up and running */
kt->state = RECONF_REINIT;
log_debug("%s: %s with rtableid %d rdomain %d", __func__, name,
rtableid, rdomid);
return (0);
}
void
ktable_free(u_int rtableid)
{
struct ktable *kt, *nkt;
if ((kt = ktable_get(rtableid)) == NULL)
return;
/* decouple from kernel, no new routes will be entered from here */
kr_fib_decouple(kt->rtableid);
/* first unhook from the nexthop table */
nkt = ktable_get(kt->nhtableid);
nkt->nhrefcnt--;
/*
* Evil little details:
* If kt->nhrefcnt > 0 then kt == nkt and nothing needs to be done.
* If kt != nkt then kt->nhrefcnt must be 0 and kt must be killed.
* If nkt is no longer referenced it must be killed (possible double
* free so check that kt != nkt).
*/
if (kt != nkt && nkt->nhrefcnt <= 0)
ktable_destroy(nkt);
if (kt->nhrefcnt <= 0)
ktable_destroy(kt);
}
void
ktable_destroy(struct ktable *kt)
{
/* decouple just to be sure, does not hurt */
kr_fib_decouple(kt->rtableid);
log_debug("%s: freeing ktable %s rtableid %u", __func__, kt->descr,
kt->rtableid);
/* only clear nexthop table if it is the main rdomain table */
if (kt->rtableid == kt->nhtableid)
knexthop_clear(kt);
kroute_clear(kt);
kroute6_clear(kt);
kr_net_clear(kt);
krt[kt->rtableid] = NULL;
free(kt);
}
struct ktable *
ktable_get(u_int rtableid)
{
if (rtableid >= krt_size)
return (NULL);
return (krt[rtableid]);
}
int
ktable_update(u_int rtableid, char *name, int flags)
{
struct ktable *kt, *rkt;
u_int rdomid;
if (!ktable_exists(rtableid, &rdomid))
fatalx("King Bula lost a table"); /* may not happen */
if (rdomid != rtableid || flags & F_RIB_NOFIB) {
rkt = ktable_get(rdomid);
if (rkt == NULL) {
char buf[32];
snprintf(buf, sizeof(buf), "rdomain_%d", rdomid);
if (ktable_new(rdomid, rdomid, buf, 0))
return (-1);
} else {
/* there is no need for full fib synchronisation if
* the table is only used for nexthop lookups.
*/
if (rkt->state == RECONF_DELETE) {
rkt->fib_conf = 0;
rkt->state = RECONF_KEEP;
}
}
}
if (flags & (F_RIB_NOFIB | F_RIB_NOEVALUATE))
/* only rdomain table must exist */
return (0);
kt = ktable_get(rtableid);
if (kt == NULL) {
if (ktable_new(rtableid, rdomid, name,
!(flags & F_RIB_NOFIBSYNC)))
return (-1);
} else {
/* fib sync has higher preference then no sync */
if (kt->state == RECONF_DELETE) {
kt->fib_conf = !(flags & F_RIB_NOFIBSYNC);
kt->state = RECONF_KEEP;
} else if (!kt->fib_conf)
kt->fib_conf = !(flags & F_RIB_NOFIBSYNC);
strlcpy(kt->descr, name, sizeof(kt->descr));
}
return (0);
}
int
ktable_exists(u_int rtableid, u_int *rdomid)
{
size_t len;
struct rt_tableinfo info;
int mib[6];
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0;
mib[3] = 0;
mib[4] = NET_RT_TABLE;
mib[5] = rtableid;
len = sizeof(info);
if (sysctl(mib, 6, &info, &len, NULL, 0) == -1) {
if (errno == ENOENT)
/* table nonexistent */
return (0);
log_warn("sysctl net.route.rtableid");
/* must return 0 so that the table is considered non-existent */
return (0);
}
if (rdomid)
*rdomid = info.rti_domainid;
return (1);
}
int
kr_change(u_int rtableid, struct kroute_full *kf)
{
struct ktable *kt;
if ((kt = ktable_get(rtableid)) == NULL)
/* too noisy during reloads, just ignore */
return (0);
kf->flags |= F_BGPD;
kf->priority = RTP_MINE;
if (!knexthop_true_nexthop(kt, kf))
return kroute_remove(kt, kf, 1);
switch (kf->prefix.aid) {
case AID_INET:
return (kr4_change(kt, kf));
case AID_INET6:
return (kr6_change(kt, kf));
case AID_VPN_IPv4:
return (krVPN4_change(kt, kf));
case AID_VPN_IPv6:
return (krVPN6_change(kt, kf));
}
log_warnx("%s: not handled AID", __func__);
return (-1);
}
int
kr4_change(struct ktable *kt, struct kroute_full *kf)
{
struct kroute *kr;
/* for blackhole and reject routes nexthop needs to be 127.0.0.1 */
if (kf->flags & (F_BLACKHOLE|F_REJECT))
kf->nexthop.v4.s_addr = htonl(INADDR_LOOPBACK);
/* nexthop within 127/8 -> ignore silently */
else if ((kf->nexthop.v4.s_addr & htonl(IN_CLASSA_NET)) ==
htonl(INADDR_LOOPBACK & IN_CLASSA_NET))
return (0);
if ((kr = kroute_find(kt, &kf->prefix, kf->prefixlen,
kf->priority)) == NULL) {
if (kroute_insert(kt, kf) == -1)
return (-1);
} else {
kr->nexthop.s_addr = kf->nexthop.v4.s_addr;
rtlabel_unref(kr->labelid);
kr->labelid = rtlabel_name2id(kf->label);
if (kf->flags & F_BLACKHOLE)
kr->flags |= F_BLACKHOLE;
else
kr->flags &= ~F_BLACKHOLE;
if (kf->flags & F_REJECT)
kr->flags |= F_REJECT;
else
kr->flags &= ~F_REJECT;
if (send_rtmsg(RTM_CHANGE, kt, kf))
kr->flags |= F_BGPD_INSERTED;
}
return (0);
}
int
kr6_change(struct ktable *kt, struct kroute_full *kf)
{
struct kroute6 *kr6;
struct in6_addr lo6 = IN6ADDR_LOOPBACK_INIT;
/* for blackhole and reject routes nexthop needs to be ::1 */
if (kf->flags & (F_BLACKHOLE|F_REJECT))
memcpy(&kf->nexthop.v6, &lo6, sizeof(kf->nexthop.v6));
/* nexthop to loopback -> ignore silently */
else if (IN6_IS_ADDR_LOOPBACK(&kf->nexthop.v6))
return (0);
if ((kr6 = kroute6_find(kt, &kf->prefix, kf->prefixlen,
kf->priority)) == NULL) {
if (kroute_insert(kt, kf) == -1)
return (-1);
} else {
memcpy(&kr6->nexthop, &kf->nexthop.v6, sizeof(struct in6_addr));
kr6->nexthop_scope_id = kf->nexthop.scope_id;
rtlabel_unref(kr6->labelid);
kr6->labelid = rtlabel_name2id(kf->label);
if (kf->flags & F_BLACKHOLE)
kr6->flags |= F_BLACKHOLE;
else
kr6->flags &= ~F_BLACKHOLE;
if (kf->flags & F_REJECT)
kr6->flags |= F_REJECT;
else
kr6->flags &= ~F_REJECT;
if (send_rtmsg(RTM_CHANGE, kt, kf))
kr6->flags |= F_BGPD_INSERTED;
}
return (0);
}
int
krVPN4_change(struct ktable *kt, struct kroute_full *kf)
{
struct kroute *kr;
uint32_t mplslabel = 0;
/* nexthop within 127/8 -> ignore silently */
if ((kf->nexthop.v4.s_addr & htonl(IN_CLASSA_NET)) ==
htonl(INADDR_LOOPBACK & IN_CLASSA_NET))
return (0);
/* only a single MPLS label is supported for now */
if (kf->prefix.labellen != 3) {
log_warnx("%s: %s/%u has not a single label", __func__,
log_addr(&kf->prefix), kf->prefixlen);
return (0);
}
mplslabel = (kf->prefix.labelstack[0] << 24) |
(kf->prefix.labelstack[1] << 16) |
(kf->prefix.labelstack[2] << 8);
mplslabel = htonl(mplslabel);
kf->flags |= F_MPLS;
kf->mplslabel = mplslabel;
/* for blackhole and reject routes nexthop needs to be 127.0.0.1 */
if (kf->flags & (F_BLACKHOLE|F_REJECT))
kf->nexthop.v4.s_addr = htonl(INADDR_LOOPBACK);
if ((kr = kroute_find(kt, &kf->prefix, kf->prefixlen,
kf->priority)) == NULL) {
if (kroute_insert(kt, kf) == -1)
return (-1);
} else {
kr->mplslabel = mplslabel;
kr->flags |= F_MPLS;
kr->ifindex = kf->ifindex;
kr->nexthop.s_addr = kf->nexthop.v4.s_addr;
rtlabel_unref(kr->labelid);
kr->labelid = rtlabel_name2id(kf->label);
if (kf->flags & F_BLACKHOLE)
kr->flags |= F_BLACKHOLE;
else
kr->flags &= ~F_BLACKHOLE;
if (kf->flags & F_REJECT)
kr->flags |= F_REJECT;
else
kr->flags &= ~F_REJECT;
if (send_rtmsg(RTM_CHANGE, kt, kf))
kr->flags |= F_BGPD_INSERTED;
}
return (0);
}
int
krVPN6_change(struct ktable *kt, struct kroute_full *kf)
{
struct kroute6 *kr6;
struct in6_addr lo6 = IN6ADDR_LOOPBACK_INIT;
uint32_t mplslabel = 0;
/* nexthop to loopback -> ignore silently */
if (IN6_IS_ADDR_LOOPBACK(&kf->nexthop.v6))
return (0);
/* only a single MPLS label is supported for now */
if (kf->prefix.labellen != 3) {
log_warnx("%s: %s/%u has not a single label", __func__,
log_addr(&kf->prefix), kf->prefixlen);
return (0);
}
mplslabel = (kf->prefix.labelstack[0] << 24) |
(kf->prefix.labelstack[1] << 16) |
(kf->prefix.labelstack[2] << 8);
mplslabel = htonl(mplslabel);
kf->flags |= F_MPLS;
kf->mplslabel = mplslabel;
/* for blackhole and reject routes nexthop needs to be ::1 */
if (kf->flags & (F_BLACKHOLE|F_REJECT))
memcpy(&kf->nexthop.v6, &lo6, sizeof(kf->nexthop.v6));
if ((kr6 = kroute6_find(kt, &kf->prefix, kf->prefixlen,
kf->priority)) == NULL) {
if (kroute_insert(kt, kf) == -1)
return (-1);
} else {
kr6->mplslabel = mplslabel;
kr6->flags |= F_MPLS;
kr6->ifindex = kf->ifindex;
memcpy(&kr6->nexthop, &kf->nexthop.v6, sizeof(struct in6_addr));
kr6->nexthop_scope_id = kf->nexthop.scope_id;
rtlabel_unref(kr6->labelid);
kr6->labelid = rtlabel_name2id(kf->label);
if (kf->flags & F_BLACKHOLE)
kr6->flags |= F_BLACKHOLE;
else
kr6->flags &= ~F_BLACKHOLE;
if (kf->flags & F_REJECT)
kr6->flags |= F_REJECT;
else
kr6->flags &= ~F_REJECT;
if (send_rtmsg(RTM_CHANGE, kt, kf))
kr6->flags |= F_BGPD_INSERTED;
}
return (0);
}
int
kr_delete(u_int rtableid, struct kroute_full *kf)
{
struct ktable *kt;
if ((kt = ktable_get(rtableid)) == NULL)
/* too noisy during reloads, just ignore */
return (0);
kf->flags |= F_BGPD;
kf->priority = RTP_MINE;
return kroute_remove(kt, kf, 1);
}
int
kr_flush(u_int rtableid)
{
struct ktable *kt;
struct kroute *kr, *next;
struct kroute6 *kr6, *next6;
if ((kt = ktable_get(rtableid)) == NULL)
/* too noisy during reloads, just ignore */
return (0);
RB_FOREACH_SAFE(kr, kroute_tree, &kt->krt, next)
if ((kr->flags & F_BGPD_INSERTED)) {
if (kroute_remove(kt, kr_tofull(kr), 1) == -1)
return (-1);
}
RB_FOREACH_SAFE(kr6, kroute6_tree, &kt->krt6, next6)
if ((kr6->flags & F_BGPD_INSERTED)) {
if (kroute_remove(kt, kr6_tofull(kr6), 1) == -1)
return (-1);
}
kt->fib_sync = 0;
return (0);
}
void
kr_shutdown(void)
{
u_int i;
for (i = krt_size; i > 0; i--)
ktable_free(i - 1);
kif_clear();
free(krt);
}
void
kr_fib_couple(u_int rtableid)
{
struct ktable *kt;
struct kroute *kr;
struct kroute6 *kr6;
if ((kt = ktable_get(rtableid)) == NULL) /* table does not exist */
return;
if (kt->fib_sync) /* already coupled */
return;
kt->fib_sync = 1;
RB_FOREACH(kr, kroute_tree, &kt->krt)
if (kr->flags & F_BGPD) {
if (send_rtmsg(RTM_ADD, kt, kr_tofull(kr)))
kr->flags |= F_BGPD_INSERTED;
}
RB_FOREACH(kr6, kroute6_tree, &kt->krt6)
if (kr6->flags & F_BGPD) {
if (send_rtmsg(RTM_ADD, kt, kr6_tofull(kr6)))
kr6->flags |= F_BGPD_INSERTED;
}
log_info("kernel routing table %u (%s) coupled", kt->rtableid,
kt->descr);
}
void
kr_fib_couple_all(void)
{
u_int i;
for (i = krt_size; i > 0; i--)
kr_fib_couple(i - 1);
}
void
kr_fib_decouple(u_int rtableid)
{
struct ktable *kt;
struct kroute *kr;
struct kroute6 *kr6;
if ((kt = ktable_get(rtableid)) == NULL) /* table does not exist */
return;
if (!kt->fib_sync) /* already decoupled */
return;
RB_FOREACH(kr, kroute_tree, &kt->krt)
if ((kr->flags & F_BGPD_INSERTED)) {
if (send_rtmsg(RTM_DELETE, kt, kr_tofull(kr)))
kr->flags &= ~F_BGPD_INSERTED;
}
RB_FOREACH(kr6, kroute6_tree, &kt->krt6)
if ((kr6->flags & F_BGPD_INSERTED)) {
if (send_rtmsg(RTM_DELETE, kt, kr6_tofull(kr6)))
kr6->flags &= ~F_BGPD_INSERTED;
}
kt->fib_sync = 0;
log_info("kernel routing table %u (%s) decoupled", kt->rtableid,
kt->descr);
}
void
kr_fib_decouple_all(void)
{
u_int i;
for (i = krt_size; i > 0; i--)
kr_fib_decouple(i - 1);
}
void
kr_fib_prio_set(uint8_t prio)
{
kr_state.fib_prio = prio;
}
int
kr_dispatch_msg(void)
{
return (dispatch_rtmsg());
}
int
kr_nexthop_add(u_int rtableid, struct bgpd_addr *addr)
{
struct ktable *kt;
struct knexthop *h;
if ((kt = ktable_get(rtableid)) == NULL) {
log_warnx("%s: non-existent rtableid %d", __func__, rtableid);
return (0);
}
if ((h = knexthop_find(kt, addr)) != NULL) {
/* should not happen... this is actually an error path */
knexthop_send_update(h);
} else {
if ((h = calloc(1, sizeof(*h))) == NULL) {
log_warn("%s", __func__);
return (-1);
}
memcpy(&h->nexthop, addr, sizeof(h->nexthop));
if (knexthop_insert(kt, h) == -1)
return (-1);
}
return (0);
}
void
kr_nexthop_delete(u_int rtableid, struct bgpd_addr *addr)
{
struct ktable *kt;
struct knexthop *kn;
if ((kt = ktable_get(rtableid)) == NULL) {
log_warnx("%s: non-existent rtableid %d", __func__,
rtableid);
return;
}
if ((kn = knexthop_find(kt, addr)) == NULL)
return;
knexthop_remove(kt, kn);
}
static struct ctl_show_interface *
kr_show_interface(struct kif *kif)
{
static struct ctl_show_interface iface;
uint64_t ifms_type;
memset(&iface, 0, sizeof(iface));
strlcpy(iface.ifname, kif->ifname, sizeof(iface.ifname));
snprintf(iface.linkstate, sizeof(iface.linkstate),
"%s", get_linkstate(kif->if_type, kif->link_state));
if ((ifms_type = ift2ifm(kif->if_type)) != 0)
snprintf(iface.media, sizeof(iface.media),
"%s", get_media_descr(ifms_type));
iface.baudrate = kif->baudrate;
iface.rdomain = kif->rdomain;
iface.nh_reachable = kif->nh_reachable;
iface.is_up = (kif->flags & IFF_UP) == IFF_UP;
return &iface;
}
void
kr_show_route(struct imsg *imsg)
{
struct ktable *kt;
struct kroute *kr, *kn;
struct kroute6 *kr6, *kn6;
struct kroute_full *kf;
struct bgpd_addr addr;
struct ctl_kroute_req req;
struct ctl_show_nexthop snh;
struct knexthop *h;
struct kif *kif;
uint32_t tableid;
pid_t pid;
u_int i;
u_short ifindex = 0;
tableid = imsg_get_id(imsg);
pid = imsg_get_pid(imsg);
switch (imsg_get_type(imsg)) {
case IMSG_CTL_KROUTE:
if (imsg_get_data(imsg, &req, sizeof(req)) == -1) {
log_warnx("%s: wrong imsg len", __func__);
break;
}
kt = ktable_get(tableid);
if (kt == NULL) {
log_warnx("%s: table %u does not exist", __func__,
tableid);
break;
}
if (!req.af || req.af == AF_INET)
RB_FOREACH(kr, kroute_tree, &kt->krt) {
if (req.flags && (kr->flags & req.flags) == 0)
continue;
kn = kr;
do {
kf = kr_tofull(kn);
kf->priority = kr_priority(kf);
send_imsg_session(IMSG_CTL_KROUTE,
pid, kf, sizeof(*kf));
} while ((kn = kn->next) != NULL);
}
if (!req.af || req.af == AF_INET6)
RB_FOREACH(kr6, kroute6_tree, &kt->krt6) {
if (req.flags && (kr6->flags & req.flags) == 0)
continue;
kn6 = kr6;
do {
kf = kr6_tofull(kn6);
kf->priority = kr_priority(kf);
send_imsg_session(IMSG_CTL_KROUTE,
pid, kf, sizeof(*kf));
} while ((kn6 = kn6->next) != NULL);
}
break;
case IMSG_CTL_KROUTE_ADDR:
if (imsg_get_data(imsg, &addr, sizeof(addr)) == -1) {
log_warnx("%s: wrong imsg len", __func__);
break;
}
kt = ktable_get(tableid);
if (kt == NULL) {
log_warnx("%s: table %u does not exist", __func__,
tableid);
break;
}
kr = NULL;
switch (addr.aid) {
case AID_INET:
kr = kroute_match(kt, &addr, 1);
if (kr != NULL) {
kf = kr_tofull(kr);
kf->priority = kr_priority(kf);
send_imsg_session(IMSG_CTL_KROUTE,
pid, kf, sizeof(*kf));
}
break;
case AID_INET6:
kr6 = kroute6_match(kt, &addr, 1);
if (kr6 != NULL) {
kf = kr6_tofull(kr6);
kf->priority = kr_priority(kf);
send_imsg_session(IMSG_CTL_KROUTE,
pid, kf, sizeof(*kf));
}
break;
}
break;
case IMSG_CTL_SHOW_NEXTHOP:
kt = ktable_get(tableid);
if (kt == NULL) {
log_warnx("%s: table %u does not exist", __func__,
tableid);
break;
}
RB_FOREACH(h, knexthop_tree, KT2KNT(kt)) {
memset(&snh, 0, sizeof(snh));
memcpy(&snh.addr, &h->nexthop, sizeof(snh.addr));
if (h->kroute != NULL) {
switch (h->nexthop.aid) {
case AID_INET:
kr = h->kroute;
snh.valid = kroute_validate(kr);
snh.krvalid = 1;
snh.kr = *kr_tofull(kr);
ifindex = kr->ifindex;
break;
case AID_INET6:
kr6 = h->kroute;
snh.valid = kroute6_validate(kr6);
snh.krvalid = 1;
snh.kr = *kr6_tofull(kr6);
ifindex = kr6->ifindex;
break;
}
snh.kr.priority = kr_priority(&snh.kr);
if ((kif = kif_find(ifindex)) != NULL)
memcpy(&snh.iface,
kr_show_interface(kif),
sizeof(snh.iface));
}
send_imsg_session(IMSG_CTL_SHOW_NEXTHOP, pid,
&snh, sizeof(snh));
}
break;
case IMSG_CTL_SHOW_INTERFACE:
RB_FOREACH(kif, kif_tree, &kit)
send_imsg_session(IMSG_CTL_SHOW_INTERFACE,
pid, kr_show_interface(kif),
sizeof(struct ctl_show_interface));
break;
case IMSG_CTL_SHOW_FIB_TABLES:
for (i = 0; i < krt_size; i++) {
struct ktable ktab;
if ((kt = ktable_get(i)) == NULL)
continue;
ktab = *kt;
/* do not leak internal information */
RB_INIT(&ktab.krt);
RB_INIT(&ktab.krt6);
RB_INIT(&ktab.knt);
TAILQ_INIT(&ktab.krn);
send_imsg_session(IMSG_CTL_SHOW_FIB_TABLES,
pid, &ktab, sizeof(ktab));
}
break;
default: /* nada */
break;
}
send_imsg_session(IMSG_CTL_END, pid, NULL, 0);
}
static void
kr_send_dependon(struct kif *kif)
{
struct session_dependon sdon = { 0 };
strlcpy(sdon.ifname, kif->ifname, sizeof(sdon.ifname));
sdon.depend_state = kif->depend_state;
send_imsg_session(IMSG_SESSION_DEPENDON, 0, &sdon, sizeof(sdon));
}
void
kr_ifinfo(char *ifname)
{
struct kif *kif;
RB_FOREACH(kif, kif_tree, &kit)
if (!strcmp(ifname, kif->ifname)) {
kr_send_dependon(kif);
return;
}
}
static int
kr_net_redist_add(struct ktable *kt, struct network_config *net,
struct filter_set_head *attr, int dynamic)
{
struct kredist_node *r, *xr;
if ((r = calloc(1, sizeof(*r))) == NULL)
fatal("%s", __func__);
r->prefix = net->prefix;
r->prefixlen = net->prefixlen;
r->rd = net->rd;
r->dynamic = dynamic;
xr = RB_INSERT(kredist_tree, &kt->kredist, r);
if (xr != NULL) {
free(r);
if (dynamic != xr->dynamic && dynamic) {
/*
* ignore update a non-dynamic announcement is
* already present which has preference.
*/
return 0;
}
/*
* only equal or non-dynamic announcement ends up here.
* In both cases reset the dynamic flag (nop for equal) and
* redistribute.
*/
xr->dynamic = dynamic;
}
if (send_network(IMSG_NETWORK_ADD, net, attr) == -1)
log_warnx("%s: failed to send network update", __func__);
return 1;
}
static void
kr_net_redist_del(struct ktable *kt, struct network_config *net, int dynamic)
{
struct kredist_node *r, node;
memset(&node, 0, sizeof(node));
node.prefix = net->prefix;
node.prefixlen = net->prefixlen;
node.rd = net->rd;
r = RB_FIND(kredist_tree, &kt->kredist, &node);
if (r == NULL || dynamic != r->dynamic)
return;
if (RB_REMOVE(kredist_tree, &kt->kredist, r) == NULL) {
log_warnx("%s: failed to remove network %s/%u", __func__,
log_addr(&node.prefix), node.prefixlen);
return;
}
free(r);
if (send_network(IMSG_NETWORK_REMOVE, net, NULL) == -1)
log_warnx("%s: failed to send network removal", __func__);
}
int
kr_net_match(struct ktable *kt, struct network_config *net, uint16_t flags,
int loopback)
{
struct network *xn;
TAILQ_FOREACH(xn, &kt->krn, entry) {
if (xn->net.prefix.aid != net->prefix.aid)
continue;
switch (xn->net.type) {
case NETWORK_DEFAULT:
/* static match already redistributed */
continue;
case NETWORK_STATIC:
/* Skip networks with nexthop on loopback. */
if (loopback)
continue;
if (flags & F_STATIC)
break;
continue;
case NETWORK_CONNECTED:
/* Skip networks with nexthop on loopback. */
if (loopback)
continue;
if (flags & F_CONNECTED)
break;
continue;
case NETWORK_RTLABEL:
if (net->rtlabel == xn->net.rtlabel)
break;
continue;
case NETWORK_PRIORITY:
if (net->priority == xn->net.priority)
break;
continue;
case NETWORK_MRTCLONE:
case NETWORK_PREFIXSET:
/* must not happen */
log_warnx("%s: found a NETWORK_PREFIXSET, "
"please send a bug report", __func__);
continue;
}
net->rd = xn->net.rd;
if (kr_net_redist_add(kt, net, &xn->net.attrset, 1))
return (1);
}
return (0);
}
struct network *
kr_net_find(struct ktable *kt, struct network *n)
{
struct network *xn;
TAILQ_FOREACH(xn, &kt->krn, entry) {
if (n->net.type != xn->net.type ||
n->net.prefixlen != xn->net.prefixlen ||
n->net.rd != xn->net.rd ||
n->net.rtlabel != xn->net.rtlabel ||
n->net.priority != xn->net.priority)
continue;
if (memcmp(&n->net.prefix, &xn->net.prefix,
sizeof(n->net.prefix)) == 0)
return (xn);
}
return (NULL);
}
void
kr_net_reload(u_int rtableid, uint64_t rd, struct network_head *nh)
{
struct network *n, *xn;
struct ktable *kt;
if ((kt = ktable_get(rtableid)) == NULL)
fatalx("%s: non-existent rtableid %d", __func__, rtableid);
while ((n = TAILQ_FIRST(nh)) != NULL) {
TAILQ_REMOVE(nh, n, entry);
n->net.old = 0;
n->net.rd = rd;
xn = kr_net_find(kt, n);
if (xn) {
xn->net.old = 0;
filterset_free(&xn->net.attrset);
filterset_move(&n->net.attrset, &xn->net.attrset);
network_free(n);
} else
TAILQ_INSERT_TAIL(&kt->krn, n, entry);
}
}
void
kr_net_clear(struct ktable *kt)
{
struct network *n, *xn;
TAILQ_FOREACH_SAFE(n, &kt->krn, entry, xn) {
TAILQ_REMOVE(&kt->krn, n, entry);
if (n->net.type == NETWORK_DEFAULT)
kr_net_redist_del(kt, &n->net, 0);
network_free(n);
}
}
void
kr_redistribute(int type, struct ktable *kt, struct kroute_full *kf)
{
struct network_config net;
uint32_t a;
int loflag = 0;
memset(&net, 0, sizeof(net));
net.prefix = kf->prefix;
net.prefixlen = kf->prefixlen;
net.rtlabel = rtlabel_name2id(kf->label);
rtlabel_unref(net.rtlabel); /* drop reference now, which is ok here */
net.priority = kf->priority;
/* shortcut for removals */
if (type == IMSG_NETWORK_REMOVE) {
kr_net_redist_del(kt, &net, 1);
return;
}
if (kf->flags & F_BGPD)
return;
switch (kf->prefix.aid) {
case AID_INET:
/*
* We consider the loopback net and multicast addresses
* as not redistributable.
*/
a = ntohl(kf->prefix.v4.s_addr);
if (IN_MULTICAST(a) ||
(a >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
return;
/* Check if the nexthop is the loopback addr. */
if (kf->nexthop.v4.s_addr == htonl(INADDR_LOOPBACK))
loflag = 1;
break;
case AID_INET6:
/*
* We consider unspecified, loopback, multicast,
* link- and site-local, IPv4 mapped and IPv4 compatible
* addresses as not redistributable.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&kf->prefix.v6) ||
IN6_IS_ADDR_LOOPBACK(&kf->prefix.v6) ||
IN6_IS_ADDR_MULTICAST(&kf->prefix.v6) ||
IN6_IS_ADDR_LINKLOCAL(&kf->prefix.v6) ||
IN6_IS_ADDR_SITELOCAL(&kf->prefix.v6) ||
IN6_IS_ADDR_V4MAPPED(&kf->prefix.v6) ||
IN6_IS_ADDR_V4COMPAT(&kf->prefix.v6))
return;
/* Check if the nexthop is the loopback addr. */
if (IN6_IS_ADDR_LOOPBACK(&kf->nexthop.v6))
loflag = 1;
break;
default:
/* unhandled AID cannot be redistributed */
return;
}
/*
* never allow 0/0 or ::/0 the default route can only be redistributed
* with announce default.
*/
if (kf->prefixlen == 0)
return;
if (kr_net_match(kt, &net, kf->flags, loflag) == 0)
/* no longer matches, if still present remove it */
kr_net_redist_del(kt, &net, 1);
}
void
ktable_preload(void)
{
struct ktable *kt;
struct network *n;
u_int i;
for (i = 0; i < krt_size; i++) {
if ((kt = ktable_get(i)) == NULL)
continue;
kt->state = RECONF_DELETE;
/* mark all networks as old */
TAILQ_FOREACH(n, &kt->krn, entry)
n->net.old = 1;
}
}
void
ktable_postload(void)
{
struct ktable *kt;
struct network *n, *xn;
u_int i;
for (i = krt_size; i > 0; i--) {
if ((kt = ktable_get(i - 1)) == NULL)
continue;
if (kt->state == RECONF_DELETE) {
ktable_free(i - 1);
continue;
} else if (kt->state == RECONF_REINIT)
kt->fib_sync = kt->fib_conf;
/* cleanup old networks */
TAILQ_FOREACH_SAFE(n, &kt->krn, entry, xn) {
if (n->net.old) {
TAILQ_REMOVE(&kt->krn, n, entry);
if (n->net.type == NETWORK_DEFAULT)
kr_net_redist_del(kt, &n->net, 0);
network_free(n);
}
}
}
}
int
kr_reload(void)
{
struct ktable *kt;
struct kroute *kr;
struct kroute6 *kr6;
struct knexthop *nh;
struct network *n;
u_int rid;
int hasdyn = 0;
for (rid = 0; rid < krt_size; rid++) {
if ((kt = ktable_get(rid)) == NULL)
continue;
/* if this is the main nexthop table revalidate nexthops */
if (kt->rtableid == kt->nhtableid)
RB_FOREACH(nh, knexthop_tree, KT2KNT(kt))
knexthop_validate(kt, nh);
TAILQ_FOREACH(n, &kt->krn, entry)
if (n->net.type == NETWORK_DEFAULT) {
kr_net_redist_add(kt, &n->net,
&n->net.attrset, 0);
} else
hasdyn = 1;
if (hasdyn) {
/* only evaluate the full tree if we need */
RB_FOREACH(kr, kroute_tree, &kt->krt)
kr_redistribute(IMSG_NETWORK_ADD, kt,
kr_tofull(kr));
RB_FOREACH(kr6, kroute6_tree, &kt->krt6)
kr_redistribute(IMSG_NETWORK_ADD, kt,
kr6_tofull(kr6));
}
}
return (0);
}
uint8_t
kr_priority(struct kroute_full *kf)
{
if (kf->priority == RTP_MINE)
return kr_state.fib_prio;
return kf->priority;
}
struct kroute_full *
kr_tofull(struct kroute *kr)
{
static struct kroute_full kf;
memset(&kf, 0, sizeof(kf));
kf.prefix.aid = AID_INET;
kf.prefix.v4.s_addr = kr->prefix.s_addr;
kf.nexthop.aid = AID_INET;
kf.nexthop.v4.s_addr = kr->nexthop.s_addr;
strlcpy(kf.label, rtlabel_id2name(kr->labelid), sizeof(kf.label));
kf.flags = kr->flags;
kf.ifindex = kr->ifindex;
kf.prefixlen = kr->prefixlen;
kf.priority = kr->priority;
kf.mplslabel = kr->mplslabel;
return (&kf);
}
struct kroute_full *
kr6_tofull(struct kroute6 *kr6)
{
static struct kroute_full kf;
memset(&kf, 0, sizeof(kf));
kf.prefix.aid = AID_INET6;
kf.prefix.v6 = kr6->prefix;
kf.prefix.scope_id = kr6->prefix_scope_id;
kf.nexthop.aid = AID_INET6;
kf.nexthop.v6 = kr6->nexthop;
kf.nexthop.scope_id = kr6->nexthop_scope_id;
strlcpy(kf.label, rtlabel_id2name(kr6->labelid), sizeof(kf.label));
kf.flags = kr6->flags;
kf.ifindex = kr6->ifindex;
kf.prefixlen = kr6->prefixlen;
kf.priority = kr6->priority;
kf.mplslabel = kr6->mplslabel;
return (&kf);
}
/*
* RB-tree compare functions
*/
int
kroute_compare(struct kroute *a, struct kroute *b)
{
if (ntohl(a->prefix.s_addr) < ntohl(b->prefix.s_addr))
return (-1);
if (ntohl(a->prefix.s_addr) > ntohl(b->prefix.s_addr))
return (1);
if (a->prefixlen < b->prefixlen)
return (-1);
if (a->prefixlen > b->prefixlen)
return (1);
/* if the priority is RTP_ANY finish on the first address hit */
if (a->priority == RTP_ANY || b->priority == RTP_ANY)
return (0);
if (a->priority < b->priority)
return (-1);
if (a->priority > b->priority)
return (1);
return (0);
}
int
kroute6_compare(struct kroute6 *a, struct kroute6 *b)
{
int i;
for (i = 0; i < 16; i++) {
if (a->prefix.s6_addr[i] < b->prefix.s6_addr[i])
return (-1);
if (a->prefix.s6_addr[i] > b->prefix.s6_addr[i])
return (1);
}
if (a->prefix_scope_id < b->prefix_scope_id)
return (-1);
if (a->prefix_scope_id > b->prefix_scope_id)
return (1);
if (a->prefixlen < b->prefixlen)
return (-1);
if (a->prefixlen > b->prefixlen)
return (1);
/* if the priority is RTP_ANY finish on the first address hit */
if (a->priority == RTP_ANY || b->priority == RTP_ANY)
return (0);
if (a->priority < b->priority)
return (-1);
if (a->priority > b->priority)
return (1);
return (0);
}
int
knexthop_compare(struct knexthop *a, struct knexthop *b)
{
int i;
if (a->nexthop.aid != b->nexthop.aid)
return (b->nexthop.aid - a->nexthop.aid);
switch (a->nexthop.aid) {
case AID_INET:
if (ntohl(a->nexthop.v4.s_addr) < ntohl(b->nexthop.v4.s_addr))
return (-1);
if (ntohl(a->nexthop.v4.s_addr) > ntohl(b->nexthop.v4.s_addr))
return (1);
break;
case AID_INET6:
for (i = 0; i < 16; i++) {
if (a->nexthop.v6.s6_addr[i] < b->nexthop.v6.s6_addr[i])
return (-1);
if (a->nexthop.v6.s6_addr[i] > b->nexthop.v6.s6_addr[i])
return (1);
}
break;
default:
fatalx("%s: unknown AF", __func__);
}
return (0);
}
int
kredist_compare(struct kredist_node *a, struct kredist_node *b)
{
int i;
if (a->prefix.aid != b->prefix.aid)
return (b->prefix.aid - a->prefix.aid);
if (a->prefixlen < b->prefixlen)
return (-1);
if (a->prefixlen > b->prefixlen)
return (1);
switch (a->prefix.aid) {
case AID_INET:
if (ntohl(a->prefix.v4.s_addr) < ntohl(b->prefix.v4.s_addr))
return (-1);
if (ntohl(a->prefix.v4.s_addr) > ntohl(b->prefix.v4.s_addr))
return (1);
break;
case AID_INET6:
for (i = 0; i < 16; i++) {
if (a->prefix.v6.s6_addr[i] < b->prefix.v6.s6_addr[i])
return (-1);
if (a->prefix.v6.s6_addr[i] > b->prefix.v6.s6_addr[i])
return (1);
}
break;
default:
fatalx("%s: unknown AF", __func__);
}
if (a->rd < b->rd)
return (-1);
if (a->rd > b->rd)
return (1);
return (0);
}
int
kif_compare(struct kif *a, struct kif *b)
{
return (b->ifindex - a->ifindex);
}
/*
* tree management functions
*/
struct kroute *
kroute_find(struct ktable *kt, const struct bgpd_addr *prefix,
uint8_t prefixlen, uint8_t prio)
{
struct kroute s;
struct kroute *kn, *tmp;
s.prefix = prefix->v4;
s.prefixlen = prefixlen;
s.priority = prio;
kn = RB_FIND(kroute_tree, &kt->krt, &s);
if (kn && prio == RTP_ANY) {
tmp = RB_PREV(kroute_tree, &kt->krt, kn);
while (tmp) {
if (kroute_compare(&s, tmp) == 0)
kn = tmp;
else
break;
tmp = RB_PREV(kroute_tree, &kt->krt, kn);
}
}
return (kn);
}
struct kroute *
kroute_matchgw(struct kroute *kr, struct kroute_full *kf)
{
in_addr_t nexthop;
if (kf->flags & F_CONNECTED) {
do {
if (kr->ifindex == kf->ifindex)
return (kr);
kr = kr->next;
} while (kr);
return (NULL);
}
nexthop = kf->nexthop.v4.s_addr;
do {
if (kr->nexthop.s_addr == nexthop)
return (kr);
kr = kr->next;
} while (kr);
return (NULL);
}
int
kroute_insert(struct ktable *kt, struct kroute_full *kf)
{
struct kroute *kr, *krm;
struct kroute6 *kr6, *kr6m;
struct knexthop *n;
uint32_t mplslabel = 0;
int multipath = 0;
if (kf->prefix.aid == AID_VPN_IPv4 ||
kf->prefix.aid == AID_VPN_IPv6) {
/* only a single MPLS label is supported for now */
if (kf->prefix.labellen != 3) {
log_warnx("%s/%u does not have a single label",
log_addr(&kf->prefix), kf->prefixlen);
return -1;
}
mplslabel = (kf->prefix.labelstack[0] << 24) |
(kf->prefix.labelstack[1] << 16) |
(kf->prefix.labelstack[2] << 8);
}
switch (kf->prefix.aid) {
case AID_INET:
case AID_VPN_IPv4:
if ((kr = calloc(1, sizeof(*kr))) == NULL) {
log_warn("%s", __func__);
return (-1);
}
kr->flags = kf->flags;
kr->prefix = kf->prefix.v4;
kr->prefixlen = kf->prefixlen;
if (kf->nexthop.aid == AID_INET)
kr->nexthop = kf->nexthop.v4;
if (kf->prefix.aid == AID_VPN_IPv4) {
kr->flags |= F_MPLS;
kr->mplslabel = htonl(mplslabel);
}
kr->ifindex = kf->ifindex;
kr->priority = kf->priority;
kr->labelid = rtlabel_name2id(kf->label);
if ((krm = RB_INSERT(kroute_tree, &kt->krt, kr)) != NULL) {
/* multipath route, add at end of list */
while (krm->next != NULL)
krm = krm->next;
krm->next = kr;
multipath = 1;
}
if (kf->flags & F_BGPD)
if (send_rtmsg(RTM_ADD, kt, kf))
kr->flags |= F_BGPD_INSERTED;
break;
case AID_INET6:
case AID_VPN_IPv6:
if ((kr6 = calloc(1, sizeof(*kr6))) == NULL) {
log_warn("%s", __func__);
return (-1);
}
kr6->flags = kf->flags;
kr6->prefix = kf->prefix.v6;
kr6->prefix_scope_id = kf->prefix.scope_id;
kr6->prefixlen = kf->prefixlen;
if (kf->nexthop.aid == AID_INET6) {
kr6->nexthop = kf->nexthop.v6;
kr6->nexthop_scope_id = kf->nexthop.scope_id;
} else
kr6->nexthop = in6addr_any;
if (kf->prefix.aid == AID_VPN_IPv6) {
kr6->flags |= F_MPLS;
kr6->mplslabel = htonl(mplslabel);
}
kr6->ifindex = kf->ifindex;
kr6->priority = kf->priority;
kr6->labelid = rtlabel_name2id(kf->label);
if ((kr6m = RB_INSERT(kroute6_tree, &kt->krt6, kr6)) != NULL) {
/* multipath route, add at end of list */
while (kr6m->next != NULL)
kr6m = kr6m->next;
kr6m->next = kr6;
multipath = 1;
}
if (kf->flags & F_BGPD)
if (send_rtmsg(RTM_ADD, kt, kf))
kr6->flags |= F_BGPD_INSERTED;
break;
}
/* XXX this is wrong for nexthop validated via BGP */
if (!(kf->flags & F_BGPD)) {
RB_FOREACH(n, knexthop_tree, KT2KNT(kt))
if (prefix_compare(&kf->prefix, &n->nexthop,
kf->prefixlen) == 0)
knexthop_validate(kt, n);
/* redistribute multipath routes only once */
if (!multipath)
kr_redistribute(IMSG_NETWORK_ADD, kt, kf);
}
return (0);
}
static int
kroute4_remove(struct ktable *kt, struct kroute_full *kf, int any)
{
struct kroute *kr, *krm;
struct knexthop *n;
int multipath = 1;
if ((kr = kroute_find(kt, &kf->prefix, kf->prefixlen,
kf->priority)) == NULL)
return (-1);
if ((kr->flags & F_BGPD) != (kf->flags & F_BGPD)) {
log_warnx("%s: wrong type for %s/%u", __func__,
log_addr(&kf->prefix), kf->prefixlen);
if (!(kf->flags & F_BGPD))
kr->flags &= ~F_BGPD_INSERTED;
return (-1);
}
/* get the correct route to remove */
krm = kr;
if (!any) {
if ((krm = kroute_matchgw(kr, kf)) == NULL) {
log_warnx("delete %s/%u: route not found",
log_addr(&kf->prefix), kf->prefixlen);
return (-2);
}
}
if (krm == kr) {
/* head element */
RB_REMOVE(kroute_tree, &kt->krt, krm);
if (krm->next != NULL) {
kr = krm->next;
if (RB_INSERT(kroute_tree, &kt->krt, kr) != NULL) {
log_warnx("%s: failed to add %s/%u",
__func__, inet_ntoa(kr->prefix),
kr->prefixlen);
return (-2);
}
} else {
multipath = 0;
}
} else {
/* somewhere in the list */
while (kr->next != krm && kr->next != NULL)
kr = kr->next;
if (kr->next == NULL) {
log_warnx("%s: multipath list corrupted for %s/%u",
__func__, inet_ntoa(kr->prefix), kr->prefixlen);
return (-2);
}
kr->next = krm->next;
}
/* check whether a nexthop depends on this kroute */
if (krm->flags & F_NEXTHOP) {
RB_FOREACH(n, knexthop_tree, KT2KNT(kt)) {
if (n->kroute == krm)
knexthop_validate(kt, n);
}
}
*kf = *kr_tofull(krm);
rtlabel_unref(krm->labelid);
free(krm);
return (multipath);
}
static int
kroute6_remove(struct ktable *kt, struct kroute_full *kf, int any)
{
struct kroute6 *kr, *krm;
struct knexthop *n;
int multipath = 1;
if ((kr = kroute6_find(kt, &kf->prefix, kf->prefixlen,
kf->priority)) == NULL)
return (-1);
if ((kr->flags & F_BGPD) != (kf->flags & F_BGPD)) {
log_warnx("%s: wrong type for %s/%u", __func__,
log_addr(&kf->prefix), kf->prefixlen);
if (!(kf->flags & F_BGPD))
kr->flags &= ~F_BGPD_INSERTED;
return (-1);
}
/* get the correct route to remove */
krm = kr;
if (!any) {
if ((krm = kroute6_matchgw(kr, kf)) == NULL) {
log_warnx("delete %s/%u: route not found",
log_addr(&kf->prefix), kf->prefixlen);
return (-2);
}
}
if (krm == kr) {
/* head element */
RB_REMOVE(kroute6_tree, &kt->krt6, krm);
if (krm->next != NULL) {
kr = krm->next;
if (RB_INSERT(kroute6_tree, &kt->krt6, kr) != NULL) {
log_warnx("%s: failed to add %s/%u", __func__,
log_in6addr(&kr->prefix), kr->prefixlen);
return (-2);
}
} else {
multipath = 0;
}
} else {
/* somewhere in the list */
while (kr->next != krm && kr->next != NULL)
kr = kr->next;
if (kr->next == NULL) {
log_warnx("%s: multipath list corrupted for %s/%u",
__func__, log_in6addr(&kr->prefix), kr->prefixlen);
return (-2);
}
kr->next = krm->next;
}
/* check whether a nexthop depends on this kroute */
if (krm->flags & F_NEXTHOP) {
RB_FOREACH(n, knexthop_tree, KT2KNT(kt)) {
if (n->kroute == krm)
knexthop_validate(kt, n);
}
}
*kf = *kr6_tofull(krm);
rtlabel_unref(krm->labelid);
free(krm);
return (multipath);
}
int
kroute_remove(struct ktable *kt, struct kroute_full *kf, int any)
{
int multipath;
switch (kf->prefix.aid) {
case AID_INET:
case AID_VPN_IPv4:
multipath = kroute4_remove(kt, kf, any);
break;
case AID_INET6:
case AID_VPN_IPv6:
multipath = kroute6_remove(kt, kf, any);
break;
default:
log_warnx("%s: not handled AID", __func__);
return (-1);
}
if (multipath < 0)
return (multipath + 1);
if (kf->flags & F_BGPD_INSERTED)
send_rtmsg(RTM_DELETE, kt, kf);
/* remove only once all multipath routes are gone */
if (!(kf->flags & F_BGPD) && !multipath)
kr_redistribute(IMSG_NETWORK_REMOVE, kt, kf);
return (0);
}
void
kroute_clear(struct ktable *kt)
{
struct kroute *kr;
while ((kr = RB_MIN(kroute_tree, &kt->krt)) != NULL)
kroute_remove(kt, kr_tofull(kr), 1);
}
struct kroute6 *
kroute6_find(struct ktable *kt, const struct bgpd_addr *prefix,
uint8_t prefixlen, uint8_t prio)
{
struct kroute6 s;
struct kroute6 *kn6, *tmp;
s.prefix = prefix->v6;
s.prefix_scope_id = prefix->scope_id;
s.prefixlen = prefixlen;
s.priority = prio;
kn6 = RB_FIND(kroute6_tree, &kt->krt6, &s);
if (kn6 && prio == RTP_ANY) {
tmp = RB_PREV(kroute6_tree, &kt->krt6, kn6);
while (tmp) {
if (kroute6_compare(&s, tmp) == 0)
kn6 = tmp;
else
break;
tmp = RB_PREV(kroute6_tree, &kt->krt6, kn6);
}
}
return (kn6);
}
struct kroute6 *
kroute6_matchgw(struct kroute6 *kr, struct kroute_full *kf)
{
struct in6_addr nexthop;
if (kf->flags & F_CONNECTED) {
do {
if (kr->ifindex == kf->ifindex)
return (kr);
kr = kr->next;
} while (kr);
return (NULL);
}
nexthop = kf->nexthop.v6;
do {
if (memcmp(&kr->nexthop, &nexthop, sizeof(nexthop)) == 0 &&
kr->nexthop_scope_id == kf->nexthop.scope_id)
return (kr);
kr = kr->next;
} while (kr);
return (NULL);
}
void
kroute6_clear(struct ktable *kt)
{
struct kroute6 *kr;
while ((kr = RB_MIN(kroute6_tree, &kt->krt6)) != NULL)
kroute_remove(kt, kr6_tofull(kr), 1);
}
struct knexthop *
knexthop_find(struct ktable *kt, struct bgpd_addr *addr)
{
struct knexthop s;
memset(&s, 0, sizeof(s));
memcpy(&s.nexthop, addr, sizeof(s.nexthop));
return (RB_FIND(knexthop_tree, KT2KNT(kt), &s));
}
int
knexthop_insert(struct ktable *kt, struct knexthop *kn)
{
if (RB_INSERT(knexthop_tree, KT2KNT(kt), kn) != NULL) {
log_warnx("%s: failed for %s", __func__,
log_addr(&kn->nexthop));
free(kn);
return (-1);
}
knexthop_validate(kt, kn);
return (0);
}
void
knexthop_remove(struct ktable *kt, struct knexthop *kn)
{
kroute_detach_nexthop(kt, kn);
RB_REMOVE(knexthop_tree, KT2KNT(kt), kn);
free(kn);
}
void
knexthop_clear(struct ktable *kt)
{
struct knexthop *kn;
while ((kn = RB_MIN(knexthop_tree, KT2KNT(kt))) != NULL)
knexthop_remove(kt, kn);
}
struct kif *
kif_find(int ifindex)
{
struct kif s;
memset(&s, 0, sizeof(s));
s.ifindex = ifindex;
return (RB_FIND(kif_tree, &kit, &s));
}
int
kif_insert(struct kif *kif)
{
if (RB_INSERT(kif_tree, &kit, kif) != NULL) {
log_warnx("RB_INSERT(kif_tree, &kit, kif)");
free(kif);
return (-1);
}
return (0);
}
int
kif_remove(struct kif *kif)
{
struct ktable *kt;
kif->flags &= ~IFF_UP;
/*
* TODO, remove all kroutes using this interface,
* the kernel does this for us but better to do it
* here as well.
*/
if ((kt = ktable_get(kif->rdomain)) != NULL)
knexthop_track(kt, kif->ifindex);
RB_REMOVE(kif_tree, &kit, kif);
free(kif);
return (0);
}
void
kif_clear(void)
{
struct kif *kif;
while ((kif = RB_MIN(kif_tree, &kit)) != NULL)
kif_remove(kif);
}
/*
* nexthop validation
*/
static int
kif_validate(struct kif *kif)
{
if (!(kif->flags & IFF_UP))
return (0);
/*
* we treat link_state == LINK_STATE_UNKNOWN as valid,
* not all interfaces have a concept of "link state" and/or
* do not report up
*/
if (kif->link_state == LINK_STATE_DOWN)
return (0);
return (1);
}
/*
* return 1 when the interface is up and the link state is up or unknown
* except when this is a carp interface, then return 1 only when link state
* is up
*/
static int
kif_depend_state(struct kif *kif)
{
if (!(kif->flags & IFF_UP))
return (0);
if (kif->if_type == IFT_CARP &&
kif->link_state == LINK_STATE_UNKNOWN)
return (0);
return LINK_STATE_IS_UP(kif->link_state);
}
int
kroute_validate(struct kroute *kr)
{
struct kif *kif;
if (kr->flags & (F_REJECT | F_BLACKHOLE))
return (0);
if ((kif = kif_find(kr->ifindex)) == NULL) {
if (kr->ifindex)
log_warnx("%s: interface with index %d not found, "
"referenced from route for %s/%u", __func__,
kr->ifindex, inet_ntoa(kr->prefix),
kr->prefixlen);
return (1);
}
return (kif->nh_reachable);
}
int
kroute6_validate(struct kroute6 *kr)
{
struct kif *kif;
if (kr->flags & (F_REJECT | F_BLACKHOLE))
return (0);
if ((kif = kif_find(kr->ifindex)) == NULL) {
if (kr->ifindex)
log_warnx("%s: interface with index %d not found, "
"referenced from route for %s/%u", __func__,
kr->ifindex, log_in6addr(&kr->prefix),
kr->prefixlen);
return (1);
}
return (kif->nh_reachable);
}
int
knexthop_true_nexthop(struct ktable *kt, struct kroute_full *kf)
{
struct bgpd_addr gateway = { 0 };
struct knexthop *kn;
struct kroute *kr;
struct kroute6 *kr6;
/*
* Ignore the nexthop for VPN routes. The gateway is forced
* to an mpe(4) interface route using an MPLS label.
*/
switch (kf->prefix.aid) {
case AID_VPN_IPv4:
case AID_VPN_IPv6:
return 1;
}
kn = knexthop_find(kt, &kf->nexthop);
if (kn == NULL) {
log_warnx("%s: nexthop %s not found", __func__,
log_addr(&kf->nexthop));
return 0;
}
if (kn->kroute == NULL)
return 0;
switch (kn->nexthop.aid) {
case AID_INET:
kr = kn->kroute;
if (kr->flags & F_CONNECTED)
return 1;
gateway.aid = AID_INET;
gateway.v4.s_addr = kr->nexthop.s_addr;
break;
case AID_INET6:
kr6 = kn->kroute;
if (kr6->flags & F_CONNECTED)
return 1;
gateway.aid = AID_INET6;
gateway.v6 = kr6->nexthop;
gateway.scope_id = kr6->nexthop_scope_id;
break;
}
kf->nexthop = gateway;
return 1;
}
void
knexthop_validate(struct ktable *kt, struct knexthop *kn)
{
void *oldk;
struct kroute *kr;
struct kroute6 *kr6;
oldk = kn->kroute;
kroute_detach_nexthop(kt, kn);
if ((kt = ktable_get(kt->nhtableid)) == NULL)
fatalx("%s: lost nexthop routing table", __func__);
switch (kn->nexthop.aid) {
case AID_INET:
kr = kroute_match(kt, &kn->nexthop, 0);
if (kr != NULL) {
kn->kroute = kr;
kn->ifindex = kr->ifindex;
kr->flags |= F_NEXTHOP;
}
/*
* Send update if nexthop route changed under us if
* the route remains the same then the NH state has not
* changed.
*/
if (kr != oldk)
knexthop_send_update(kn);
break;
case AID_INET6:
kr6 = kroute6_match(kt, &kn->nexthop, 0);
if (kr6 != NULL) {
kn->kroute = kr6;
kn->ifindex = kr6->ifindex;
kr6->flags |= F_NEXTHOP;
}
if (kr6 != oldk)
knexthop_send_update(kn);
break;
}
}
/*
* Called on interface state change.
*/
void
knexthop_track(struct ktable *kt, u_short ifindex)
{
struct knexthop *kn;
RB_FOREACH(kn, knexthop_tree, KT2KNT(kt))
if (kn->ifindex == ifindex)
knexthop_validate(kt, kn);
}
/*
* Called on route change.
*/
void
knexthop_update(struct ktable *kt, struct kroute_full *kf)
{
struct knexthop *kn;
RB_FOREACH(kn, knexthop_tree, KT2KNT(kt))
if (prefix_compare(&kf->prefix, &kn->nexthop,
kf->prefixlen) == 0)
knexthop_send_update(kn);
}
void
knexthop_send_update(struct knexthop *kn)
{
struct kroute_nexthop n;
struct kroute *kr;
struct kroute6 *kr6;
memset(&n, 0, sizeof(n));
n.nexthop = kn->nexthop;
if (kn->kroute == NULL) {
n.valid = 0; /* NH is not valid */
send_nexthop_update(&n);
return;
}
switch (kn->nexthop.aid) {
case AID_INET:
kr = kn->kroute;
n.valid = kroute_validate(kr);
n.connected = kr->flags & F_CONNECTED;
if (!n.connected) {
n.gateway.aid = AID_INET;
n.gateway.v4.s_addr = kr->nexthop.s_addr;
} else {
n.gateway = n.nexthop;
n.net.aid = AID_INET;
n.net.v4.s_addr = kr->prefix.s_addr;
n.netlen = kr->prefixlen;
}
break;
case AID_INET6:
kr6 = kn->kroute;
n.valid = kroute6_validate(kr6);
n.connected = kr6->flags & F_CONNECTED;
if (!n.connected) {
n.gateway.aid = AID_INET6;
n.gateway.v6 = kr6->nexthop;
n.gateway.scope_id = kr6->nexthop_scope_id;
} else {
n.gateway = n.nexthop;
n.net.aid = AID_INET6;
n.net.v6 = kr6->prefix;
n.net.scope_id = kr6->prefix_scope_id;
n.netlen = kr6->prefixlen;
}
break;
}
send_nexthop_update(&n);
}
struct kroute *
kroute_match(struct ktable *kt, struct bgpd_addr *key, int matchany)
{
int i;
struct kroute *kr;
struct bgpd_addr masked;
for (i = 32; i >= 0; i--) {
applymask(&masked, key, i);
if ((kr = kroute_find(kt, &masked, i, RTP_ANY)) != NULL)
if (matchany || bgpd_oknexthop(kr_tofull(kr)))
return (kr);
}
return (NULL);
}
struct kroute6 *
kroute6_match(struct ktable *kt, struct bgpd_addr *key, int matchany)
{
int i;
struct kroute6 *kr6;
struct bgpd_addr masked;
for (i = 128; i >= 0; i--) {
applymask(&masked, key, i);
if ((kr6 = kroute6_find(kt, &masked, i, RTP_ANY)) != NULL)
if (matchany || bgpd_oknexthop(kr6_tofull(kr6)))
return (kr6);
}
return (NULL);
}
void
kroute_detach_nexthop(struct ktable *kt, struct knexthop *kn)
{
struct knexthop *s;
struct kroute *k;
struct kroute6 *k6;
if (kn->kroute == NULL)
return;
/*
* check whether there's another nexthop depending on this kroute
* if not remove the flag
*/
RB_FOREACH(s, knexthop_tree, KT2KNT(kt))
if (s->kroute == kn->kroute && s != kn)
break;
if (s == NULL) {
switch (kn->nexthop.aid) {
case AID_INET:
k = kn->kroute;
k->flags &= ~F_NEXTHOP;
break;
case AID_INET6:
k6 = kn->kroute;
k6->flags &= ~F_NEXTHOP;
break;
}
}
kn->kroute = NULL;
kn->ifindex = 0;
}
/*
* misc helpers
*/
uint8_t
prefixlen_classful(in_addr_t ina)
{
/* it hurt to write this. */
if (ina >= 0xf0000000U) /* class E */
return (32);
else if (ina >= 0xe0000000U) /* class D */
return (4);
else if (ina >= 0xc0000000U) /* class C */
return (24);
else if (ina >= 0x80000000U) /* class B */
return (16);
else /* class A */
return (8);
}
static uint8_t
mask2prefixlen4(struct sockaddr_in *sa_in)
{
in_addr_t ina;
ina = sa_in->sin_addr.s_addr;
if (ina == 0)
return (0);
else
return (33 - ffs(ntohl(ina)));
}
static uint8_t
mask2prefixlen6(struct sockaddr_in6 *sa_in6)
{
uint8_t *ap, *ep;
u_int l = 0;
/*
* sin6_len is the size of the sockaddr so subtract the offset of
* the possibly truncated sin6_addr struct.
*/
ap = (uint8_t *)&sa_in6->sin6_addr;
ep = (uint8_t *)sa_in6 + sa_in6->sin6_len;
for (; ap < ep; ap++) {
/* this "beauty" is adopted from sbin/route/show.c ... */
switch (*ap) {
case 0xff:
l += 8;
break;
case 0xfe:
l += 7;
goto done;
case 0xfc:
l += 6;
goto done;
case 0xf8:
l += 5;
goto done;
case 0xf0:
l += 4;
goto done;
case 0xe0:
l += 3;
goto done;
case 0xc0:
l += 2;
goto done;
case 0x80:
l += 1;
goto done;
case 0x00:
goto done;
default:
fatalx("non contiguous inet6 netmask");
}
}
done:
if (l > sizeof(struct in6_addr) * 8)
fatalx("%s: prefixlen %d out of bound", __func__, l);
return (l);
}
uint8_t
mask2prefixlen(sa_family_t af, struct sockaddr *mask)
{
switch (af) {
case AF_INET:
return mask2prefixlen4((struct sockaddr_in *)mask);
case AF_INET6:
return mask2prefixlen6((struct sockaddr_in6 *)mask);
default:
fatalx("%s: unsupported af", __func__);
}
}
const struct if_status_description
if_status_descriptions[] = LINK_STATE_DESCRIPTIONS;
const struct ifmedia_description
ifm_type_descriptions[] = IFM_TYPE_DESCRIPTIONS;
uint64_t
ift2ifm(uint8_t if_type)
{
switch (if_type) {
case IFT_ETHER:
return (IFM_ETHER);
case IFT_FDDI:
return (IFM_FDDI);
case IFT_CARP:
return (IFM_CARP);
case IFT_IEEE80211:
return (IFM_IEEE80211);
default:
return (0);
}
}
const char *
get_media_descr(uint64_t media_type)
{
const struct ifmedia_description *p;
for (p = ifm_type_descriptions; p->ifmt_string != NULL; p++)
if (media_type == p->ifmt_word)
return (p->ifmt_string);
return ("unknown media");
}
const char *
get_linkstate(uint8_t if_type, int link_state)
{
const struct if_status_description *p;
static char buf[8];
for (p = if_status_descriptions; p->ifs_string != NULL; p++) {
if (LINK_STATE_DESC_MATCH(p, if_type, link_state))
return (p->ifs_string);
}
snprintf(buf, sizeof(buf), "[#%d]", link_state);
return (buf);
}
#define ROUNDUP(a) \
((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
void
get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info)
{
int i;
for (i = 0; i < RTAX_MAX; i++) {
if (addrs & (1 << i)) {
rti_info[i] = sa;
sa = (struct sockaddr *)((char *)(sa) +
ROUNDUP(sa->sa_len));
} else
rti_info[i] = NULL;
}
}
void
if_change(u_short ifindex, int flags, struct if_data *ifd)
{
struct ktable *kt;
struct kif *kif;
uint8_t reachable;
if ((kif = kif_find(ifindex)) == NULL) {
log_warnx("%s: interface with index %u not found",
__func__, ifindex);
return;
}
log_info("%s: %s: rdomain %u %s, %s, %s, %s",
__func__, kif->ifname, ifd->ifi_rdomain,
flags & IFF_UP ? "UP" : "DOWN",
get_media_descr(ift2ifm(ifd->ifi_type)),
get_linkstate(ifd->ifi_type, ifd->ifi_link_state),
get_baudrate(ifd->ifi_baudrate, "bps"));
kif->flags = flags;
kif->link_state = ifd->ifi_link_state;
kif->if_type = ifd->ifi_type;
kif->rdomain = ifd->ifi_rdomain;
kif->baudrate = ifd->ifi_baudrate;
kif->depend_state = kif_depend_state(kif);
kr_send_dependon(kif);
if ((reachable = kif_validate(kif)) == kif->nh_reachable)
return; /* nothing changed wrt nexthop validity */
kif->nh_reachable = reachable;
kt = ktable_get(kif->rdomain);
if (kt == NULL)
return;
knexthop_track(kt, ifindex);
}
void
if_announce(void *msg)
{
struct if_announcemsghdr *ifan;
struct kif *kif;
ifan = msg;
switch (ifan->ifan_what) {
case IFAN_ARRIVAL:
if ((kif = calloc(1, sizeof(*kif))) == NULL) {
log_warn("%s", __func__);
return;
}
kif->ifindex = ifan->ifan_index;
strlcpy(kif->ifname, ifan->ifan_name, sizeof(kif->ifname));
kif_insert(kif);
break;
case IFAN_DEPARTURE:
kif = kif_find(ifan->ifan_index);
if (kif != NULL)
kif_remove(kif);
break;
}
}
int
get_mpe_config(const char *name, u_int *rdomain, u_int *label)
{
struct ifreq ifr;
struct shim_hdr shim;
int s;
*label = 0;
*rdomain = 0;
s = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
if (s == -1)
return (-1);
memset(&shim, 0, sizeof(shim));
memset(&ifr, 0, sizeof(ifr));
strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
ifr.ifr_data = (caddr_t)&shim;
if (ioctl(s, SIOCGETLABEL, (caddr_t)&ifr) == -1) {
close(s);
return (-1);
}
ifr.ifr_data = NULL;
if (ioctl(s, SIOCGIFRDOMAIN, (caddr_t)&ifr) == -1) {
close(s);
return (-1);
}
close(s);
*rdomain = ifr.ifr_rdomainid;
*label = shim.shim_label;
return (0);
}
/*
* rtsock related functions
*/
#define satosin6(sa) ((struct sockaddr_in6 *)(sa))
int
send_rtmsg(int action, struct ktable *kt, struct kroute_full *kf)
{
struct iovec iov[7];
struct rt_msghdr hdr;
struct sockaddr_storage prefix, nexthop, mask, ifp, label, mpls;
struct bgpd_addr netmask;
struct sockaddr *sa;
struct sockaddr_dl *dl;
struct sockaddr_mpls *mp;
struct sockaddr_rtlabel *la;
socklen_t salen;
int iovcnt = 0;
if (!kt->fib_sync)
return (0);
/* initialize header */
memset(&hdr, 0, sizeof(hdr));
hdr.rtm_version = RTM_VERSION;
hdr.rtm_type = action;
hdr.rtm_tableid = kt->rtableid;
hdr.rtm_priority = kr_state.fib_prio;
if (kf->flags & F_BLACKHOLE)
hdr.rtm_flags |= RTF_BLACKHOLE;
if (kf->flags & F_REJECT)
hdr.rtm_flags |= RTF_REJECT;
if (action == RTM_CHANGE) /* reset these flags on change */
hdr.rtm_fmask = RTF_REJECT|RTF_BLACKHOLE;
hdr.rtm_seq = kr_state.rtseq++; /* overflow doesn't matter */
hdr.rtm_msglen = sizeof(hdr);
/* adjust iovec */
iov[iovcnt].iov_base = &hdr;
iov[iovcnt++].iov_len = sizeof(hdr);
memset(&prefix, 0, sizeof(prefix));
sa = addr2sa(&kf->prefix, 0, &salen);
sa->sa_len = salen;
#ifdef __KAME__
/* XXX need to embed the stupid scope for now */
if (sa->sa_family == AF_INET6 &&
(IN6_IS_ADDR_LINKLOCAL(&satosin6(sa)->sin6_addr) ||
IN6_IS_ADDR_MC_LINKLOCAL(&satosin6(sa)->sin6_addr) ||
IN6_IS_ADDR_MC_NODELOCAL(&satosin6(sa)->sin6_addr))) {
*(u_int16_t *)&satosin6(sa)->sin6_addr.s6_addr[2] =
htons(satosin6(sa)->sin6_scope_id);
satosin6(sa)->sin6_scope_id = 0;
}
#endif
memcpy(&prefix, sa, salen);
/* adjust header */
hdr.rtm_addrs |= RTA_DST;
hdr.rtm_msglen += ROUNDUP(salen);
/* adjust iovec */
iov[iovcnt].iov_base = &prefix;
iov[iovcnt++].iov_len = ROUNDUP(salen);
/* XXX can we even have no nexthop ??? */
if (kf->nexthop.aid != AID_UNSPEC) {
memset(&nexthop, 0, sizeof(nexthop));
sa = addr2sa(&kf->nexthop, 0, &salen);
sa->sa_len = salen;
#ifdef __KAME__
/* XXX need to embed the stupid scope for now */
if (sa->sa_family == AF_INET6 &&
(IN6_IS_ADDR_LINKLOCAL(&satosin6(sa)->sin6_addr) ||
IN6_IS_ADDR_MC_LINKLOCAL(&satosin6(sa)->sin6_addr) ||
IN6_IS_ADDR_MC_NODELOCAL(&satosin6(sa)->sin6_addr))) {
*(u_int16_t *)&satosin6(sa)->sin6_addr.s6_addr[2] =
htons(satosin6(sa)->sin6_scope_id);
satosin6(sa)->sin6_scope_id = 0;
}
#endif
memcpy(&nexthop, sa, salen);
/* adjust header */
hdr.rtm_flags |= RTF_GATEWAY;
hdr.rtm_addrs |= RTA_GATEWAY;
hdr.rtm_msglen += ROUNDUP(salen);
/* adjust iovec */
iov[iovcnt].iov_base = &nexthop;
iov[iovcnt++].iov_len = ROUNDUP(salen);
}
memset(&netmask, 0, sizeof(netmask));
memset(&netmask.v6, 0xff, sizeof(netmask.v6));
netmask.aid = kf->prefix.aid;
applymask(&netmask, &netmask, kf->prefixlen);
memset(&mask, 0, sizeof(mask));
sa = addr2sa(&netmask, 0, &salen);
sa->sa_len = salen;
memcpy(&mask, sa, salen);
/* adjust header */
hdr.rtm_addrs |= RTA_NETMASK;
hdr.rtm_msglen += ROUNDUP(salen);
/* adjust iovec */
iov[iovcnt].iov_base = &mask;
iov[iovcnt++].iov_len = ROUNDUP(salen);
if (kf->flags & F_MPLS) {
/* need to force interface for mpe(4) routes */
memset(&ifp, 0, sizeof(ifp));
dl = (struct sockaddr_dl *)&ifp;
salen = sizeof(*dl);
dl->sdl_len = salen;
dl->sdl_family = AF_LINK;
dl->sdl_index = kf->ifindex;
/* adjust header */
hdr.rtm_addrs |= RTA_IFP;
hdr.rtm_msglen += ROUNDUP(salen);
/* adjust iovec */
iov[iovcnt].iov_base = &ifp;
iov[iovcnt++].iov_len = ROUNDUP(salen);
memset(&mpls, 0, sizeof(mpls));
mp = (struct sockaddr_mpls *)&mpls;
salen = sizeof(*mp);
mp->smpls_len = salen;
mp->smpls_family = AF_MPLS;
mp->smpls_label = kf->mplslabel;
/* adjust header */
hdr.rtm_flags |= RTF_MPLS;
hdr.rtm_mpls = MPLS_OP_PUSH;
hdr.rtm_addrs |= RTA_SRC;
hdr.rtm_msglen += ROUNDUP(salen);
/* clear gateway flag since this is for mpe(4) */
hdr.rtm_flags &= ~RTF_GATEWAY;
/* adjust iovec */
iov[iovcnt].iov_base = &mpls;
iov[iovcnt++].iov_len = ROUNDUP(salen);
}
if (kf->label[0] != '\0') {
memset(&label, 0, sizeof(label));
la = (struct sockaddr_rtlabel *)&label;
salen = sizeof(struct sockaddr_rtlabel);
label.ss_len = salen;
strlcpy(la->sr_label, kf->label, sizeof(la->sr_label));
/* adjust header */
hdr.rtm_addrs |= RTA_LABEL;
hdr.rtm_msglen += ROUNDUP(salen);
/* adjust iovec */
iov[iovcnt].iov_base = &label;
iov[iovcnt++].iov_len = ROUNDUP(salen);
}
retry:
if (writev(kr_state.fd, iov, iovcnt) == -1) {
if (errno == ESRCH) {
if (hdr.rtm_type == RTM_CHANGE) {
hdr.rtm_type = RTM_ADD;
goto retry;
} else if (hdr.rtm_type == RTM_DELETE) {
log_info("route %s/%u vanished before delete",
log_addr(&kf->prefix), kf->prefixlen);
return (1);
}
}
log_warn("%s: action %u, prefix %s/%u", __func__, hdr.rtm_type,
log_addr(&kf->prefix), kf->prefixlen);
return (0);
}
return (1);
}
int
fetchtable(struct ktable *kt)
{
size_t len;
int mib[7];
char *buf = NULL, *next, *lim;
struct rt_msghdr *rtm;
struct kroute_full kf;
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0;
mib[3] = 0;
mib[4] = NET_RT_DUMP;
mib[5] = 0;
mib[6] = kt->rtableid;
if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) {
if (kt->rtableid != 0 && errno == EINVAL)
/* table nonexistent */
return (0);
log_warn("%s: sysctl", __func__);
return (-1);
}
if (len > 0) {
if ((buf = malloc(len)) == NULL) {
log_warn("%s", __func__);
return (-1);
}
if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) {
log_warn("%s: sysctl2", __func__);
free(buf);
return (-1);
}
}
lim = buf + len;
for (next = buf; next < lim; next += rtm->rtm_msglen) {
rtm = (struct rt_msghdr *)next;
if (rtm->rtm_version != RTM_VERSION)
continue;
if (dispatch_rtmsg_addr(rtm, &kf) == -1)
continue;
if (kf.priority == RTP_MINE)
send_rtmsg(RTM_DELETE, kt, &kf);
else
kroute_insert(kt, &kf);
}
free(buf);
return (0);
}
int
fetchifs(int ifindex)
{
size_t len;
int mib[6];
char *buf, *next, *lim;
struct if_msghdr ifm;
struct kif *kif;
struct sockaddr *sa, *rti_info[RTAX_MAX];
struct sockaddr_dl *sdl;
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0;
mib[3] = AF_INET; /* AF does not matter but AF_INET is shorter */
mib[4] = NET_RT_IFLIST;
mib[5] = ifindex;
if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) {
log_warn("%s: sysctl", __func__);
return (-1);
}
if ((buf = malloc(len)) == NULL) {
log_warn("%s", __func__);
return (-1);
}
if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) {
log_warn("%s: sysctl2", __func__);
free(buf);
return (-1);
}
lim = buf + len;
for (next = buf; next < lim; next += ifm.ifm_msglen) {
memcpy(&ifm, next, sizeof(ifm));
if (ifm.ifm_version != RTM_VERSION)
continue;
if (ifm.ifm_type != RTM_IFINFO)
continue;
sa = (struct sockaddr *)(next + sizeof(ifm));
get_rtaddrs(ifm.ifm_addrs, sa, rti_info);
if ((kif = calloc(1, sizeof(*kif))) == NULL) {
log_warn("%s", __func__);
free(buf);
return (-1);
}
kif->ifindex = ifm.ifm_index;
kif->flags = ifm.ifm_flags;
kif->link_state = ifm.ifm_data.ifi_link_state;
kif->if_type = ifm.ifm_data.ifi_type;
kif->rdomain = ifm.ifm_data.ifi_rdomain;
kif->baudrate = ifm.ifm_data.ifi_baudrate;
kif->nh_reachable = kif_validate(kif);
kif->depend_state = kif_depend_state(kif);
if ((sa = rti_info[RTAX_IFP]) != NULL)
if (sa->sa_family == AF_LINK) {
sdl = (struct sockaddr_dl *)sa;
if (sdl->sdl_nlen >= sizeof(kif->ifname))
memcpy(kif->ifname, sdl->sdl_data,
sizeof(kif->ifname) - 1);
else if (sdl->sdl_nlen > 0)
memcpy(kif->ifname, sdl->sdl_data,
sdl->sdl_nlen);
/* string already terminated via calloc() */
}
kif_insert(kif);
}
free(buf);
return (0);
}
int
dispatch_rtmsg(void)
{
char buf[RT_BUF_SIZE];
ssize_t n;
char *next, *lim;
struct rt_msghdr *rtm;
struct if_msghdr ifm;
struct kroute_full kf;
struct ktable *kt;
int mpath = 0;
if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) {
if (errno == EAGAIN || errno == EINTR)
return (0);
log_warn("%s: read error", __func__);
return (-1);
}
if (n == 0) {
log_warnx("routing socket closed");
return (-1);
}
lim = buf + n;
for (next = buf; next < lim; next += rtm->rtm_msglen) {
rtm = (struct rt_msghdr *)next;
if (lim < next + sizeof(u_short) ||
lim < next + rtm->rtm_msglen)
fatalx("%s: partial rtm in buffer", __func__);
if (rtm->rtm_version != RTM_VERSION)
continue;
switch (rtm->rtm_type) {
case RTM_ADD:
case RTM_CHANGE:
case RTM_DELETE:
if (rtm->rtm_pid == kr_state.pid) /* cause by us */
continue;
/* failed attempts */
if (rtm->rtm_errno || !(rtm->rtm_flags & RTF_DONE))
continue;
if ((kt = ktable_get(rtm->rtm_tableid)) == NULL)
continue;
if (dispatch_rtmsg_addr(rtm, &kf) == -1)
continue;
if (rtm->rtm_flags & RTF_MPATH)
mpath = 1;
switch (rtm->rtm_type) {
case RTM_ADD:
case RTM_CHANGE:
if (kr_fib_change(kt, &kf, rtm->rtm_type,
mpath) == -1)
return -1;
break;
case RTM_DELETE:
if (kr_fib_delete(kt, &kf, mpath) == -1)
return -1;
break;
}
break;
case RTM_IFINFO:
memcpy(&ifm, next, sizeof(ifm));
if_change(ifm.ifm_index, ifm.ifm_flags, &ifm.ifm_data);
break;
case RTM_IFANNOUNCE:
if_announce(next);
break;
default:
/* ignore for now */
break;
}
}
return (0);
}
int
dispatch_rtmsg_addr(struct rt_msghdr *rtm, struct kroute_full *kf)
{
struct sockaddr *sa, *rti_info[RTAX_MAX];
struct sockaddr_in *sa_in;
struct sockaddr_in6 *sa_in6;
struct sockaddr_rtlabel *label;
sa = (struct sockaddr *)((char *)rtm + rtm->rtm_hdrlen);
get_rtaddrs(rtm->rtm_addrs, sa, rti_info);
/* Skip ARP/ND cache, broadcast and dynamic routes. */
if (rtm->rtm_flags & (RTF_LLINFO|RTF_BROADCAST|RTF_DYNAMIC))
return (-1);
if ((sa = rti_info[RTAX_DST]) == NULL) {
log_warnx("route message without destination");
return (-1);
}
memset(kf, 0, sizeof(*kf));
if (rtm->rtm_flags & RTF_STATIC)
kf->flags |= F_STATIC;
if (rtm->rtm_flags & RTF_BLACKHOLE)
kf->flags |= F_BLACKHOLE;
if (rtm->rtm_flags & RTF_REJECT)
kf->flags |= F_REJECT;
/* adjust priority here */
if (rtm->rtm_priority == kr_state.fib_prio)
kf->priority = RTP_MINE;
else
kf->priority = rtm->rtm_priority;
label = (struct sockaddr_rtlabel *)rti_info[RTAX_LABEL];
if (label != NULL)
if (strlcpy(kf->label, label->sr_label, sizeof(kf->label)) >=
sizeof(kf->label))
fatalx("rtm label overflow");
sa2addr(sa, &kf->prefix, NULL);
switch (sa->sa_family) {
case AF_INET:
sa_in = (struct sockaddr_in *)rti_info[RTAX_NETMASK];
if (rtm->rtm_flags & RTF_HOST)
kf->prefixlen = 32;
else if (sa_in != NULL)
kf->prefixlen = mask2prefixlen4(sa_in);
else
kf->prefixlen =
prefixlen_classful(kf->prefix.v4.s_addr);
break;
case AF_INET6:
sa_in6 = (struct sockaddr_in6 *)rti_info[RTAX_NETMASK];
if (rtm->rtm_flags & RTF_HOST)
kf->prefixlen = 128;
else if (sa_in6 != NULL)
kf->prefixlen = mask2prefixlen6(sa_in6);
else
fatalx("in6 net addr without netmask");
break;
default:
return (-1);
}
if ((sa = rti_info[RTAX_GATEWAY]) == NULL) {
log_warnx("route %s/%u without gateway",
log_addr(&kf->prefix), kf->prefixlen);
return (-1);
}
kf->ifindex = rtm->rtm_index;
if (rtm->rtm_flags & RTF_GATEWAY) {
switch (sa->sa_family) {
case AF_LINK:
kf->flags |= F_CONNECTED;
break;
case AF_INET:
case AF_INET6:
sa2addr(rti_info[RTAX_GATEWAY], &kf->nexthop, NULL);
break;
}
} else {
kf->flags |= F_CONNECTED;
}
return (0);
}
int
kr_fib_delete(struct ktable *kt, struct kroute_full *kf, int mpath)
{
return kroute_remove(kt, kf, !mpath);
}
int
kr_fib_change(struct ktable *kt, struct kroute_full *kf, int type, int mpath)
{
struct kroute *kr;
struct kroute6 *kr6;
int flags, oflags;
int changed = 0, rtlabel_changed = 0;
uint16_t new_labelid;
flags = kf->flags;
switch (kf->prefix.aid) {
case AID_INET:
if ((kr = kroute_find(kt, &kf->prefix, kf->prefixlen,
kf->priority)) != NULL) {
if (!(kf->flags & F_BGPD)) {
/* get the correct route */
if (mpath && type == RTM_CHANGE &&
(kr = kroute_matchgw(kr, kf)) == NULL) {
log_warnx("%s[change]: "
"mpath route not found", __func__);
goto add4;
} else if (mpath && type == RTM_ADD)
goto add4;
if (kf->nexthop.aid == AID_INET) {
if (kr->nexthop.s_addr !=
kf->nexthop.v4.s_addr)
changed = 1;
kr->nexthop.s_addr =
kf->nexthop.v4.s_addr;
kr->ifindex = kf->ifindex;
} else {
if (kr->nexthop.s_addr != 0)
changed = 1;
kr->nexthop.s_addr = 0;
kr->ifindex = kf->ifindex;
}
if (kr->flags & F_NEXTHOP)
flags |= F_NEXTHOP;
new_labelid = rtlabel_name2id(kf->label);
if (kr->labelid != new_labelid) {
rtlabel_unref(kr->labelid);
kr->labelid = new_labelid;
rtlabel_changed = 1;
}
oflags = kr->flags;
if (flags != oflags)
changed = 1;
kr->flags = flags;
if (rtlabel_changed)
kr_redistribute(IMSG_NETWORK_ADD,
kt, kr_tofull(kr));
if ((oflags & F_CONNECTED) &&
!(flags & F_CONNECTED))
kr_redistribute(IMSG_NETWORK_ADD,
kt, kr_tofull(kr));
if ((flags & F_CONNECTED) &&
!(oflags & F_CONNECTED))
kr_redistribute(IMSG_NETWORK_ADD,
kt, kr_tofull(kr));
if (kr->flags & F_NEXTHOP && changed)
knexthop_update(kt, kf);
} else {
kr->flags &= ~F_BGPD_INSERTED;
}
} else {
add4:
kroute_insert(kt, kf);
}
break;
case AID_INET6:
if ((kr6 = kroute6_find(kt, &kf->prefix, kf->prefixlen,
kf->priority)) != NULL) {
if (!(kf->flags & F_BGPD)) {
/* get the correct route */
if (mpath && type == RTM_CHANGE &&
(kr6 = kroute6_matchgw(kr6, kf)) == NULL) {
log_warnx("%s[change]: IPv6 mpath "
"route not found", __func__);
goto add6;
} else if (mpath && type == RTM_ADD)
goto add6;
if (kf->nexthop.aid == AID_INET6) {
if (memcmp(&kr6->nexthop,
&kf->nexthop.v6,
sizeof(struct in6_addr)) ||
kr6->nexthop_scope_id !=
kf->nexthop.scope_id)
changed = 1;
kr6->nexthop = kf->nexthop.v6;
kr6->nexthop_scope_id =
kf->nexthop.scope_id;
kr6->ifindex = kf->ifindex;
} else {
if (memcmp(&kr6->nexthop,
&in6addr_any,
sizeof(struct in6_addr)))
changed = 1;
kr6->nexthop = in6addr_any;
kr6->nexthop_scope_id = 0;
kr6->ifindex = kf->ifindex;
}
if (kr6->flags & F_NEXTHOP)
flags |= F_NEXTHOP;
new_labelid = rtlabel_name2id(kf->label);
if (kr6->labelid != new_labelid) {
rtlabel_unref(kr6->labelid);
kr6->labelid = new_labelid;
rtlabel_changed = 1;
}
oflags = kr6->flags;
if (flags != oflags)
changed = 1;
kr6->flags = flags;
if (rtlabel_changed)
kr_redistribute(IMSG_NETWORK_ADD,
kt, kr6_tofull(kr6));
if ((oflags & F_CONNECTED) &&
!(flags & F_CONNECTED))
kr_redistribute(IMSG_NETWORK_ADD,
kt, kr6_tofull(kr6));
if ((flags & F_CONNECTED) &&
!(oflags & F_CONNECTED))
kr_redistribute(IMSG_NETWORK_ADD,
kt, kr6_tofull(kr6));
if (kr6->flags & F_NEXTHOP && changed)
knexthop_update(kt, kf);
} else {
kr6->flags &= ~F_BGPD_INSERTED;
}
} else {
add6:
kroute_insert(kt, kf);
}
break;
}
return (0);
}