1839 lines
38 KiB
C
1839 lines
38 KiB
C
/* $OpenBSD: if_vxlan.c,v 1.99 2023/12/23 10:52:54 bluhm Exp $ */
|
|
|
|
/*
|
|
* Copyright (c) 2021 David Gwynne <dlg@openbsd.org>
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
|
|
#include "bpfilter.h"
|
|
#include "pf.h"
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/ioctl.h>
|
|
#include <sys/timeout.h>
|
|
#include <sys/pool.h>
|
|
#include <sys/tree.h>
|
|
#include <sys/refcnt.h>
|
|
#include <sys/smr.h>
|
|
|
|
#include <sys/socketvar.h>
|
|
|
|
#include <net/if.h>
|
|
#include <net/if_var.h>
|
|
#include <net/if_dl.h>
|
|
#include <net/if_media.h>
|
|
#include <net/if_types.h>
|
|
#include <net/route.h>
|
|
#include <net/rtable.h>
|
|
|
|
#include <netinet/in.h>
|
|
#include <netinet/in_var.h>
|
|
#include <netinet/if_ether.h>
|
|
#include <netinet/ip.h>
|
|
#include <netinet/udp.h>
|
|
#include <netinet/in_pcb.h>
|
|
#include <netinet/ip_var.h>
|
|
|
|
#ifdef INET6
|
|
#include <netinet/ip6.h>
|
|
#include <netinet6/ip6_var.h>
|
|
#include <netinet6/in6_var.h>
|
|
#endif
|
|
|
|
/* for bridge stuff */
|
|
#include <net/if_bridge.h>
|
|
#include <net/if_etherbridge.h>
|
|
|
|
#if NBPFILTER > 0
|
|
#include <net/bpf.h>
|
|
#endif
|
|
|
|
/*
|
|
* The protocol.
|
|
*/
|
|
|
|
#define VXLAN_PORT 4789
|
|
|
|
struct vxlan_header {
|
|
uint32_t vxlan_flags;
|
|
#define VXLAN_F_I (1U << 27)
|
|
uint32_t vxlan_id;
|
|
#define VXLAN_VNI_SHIFT 8
|
|
#define VXLAN_VNI_MASK (0xffffffU << VXLAN_VNI_SHIFT)
|
|
};
|
|
|
|
#define VXLAN_VNI_MAX 0x00ffffffU
|
|
#define VXLAN_VNI_MIN 0x00000000U
|
|
|
|
/*
|
|
* The driver.
|
|
*/
|
|
|
|
union vxlan_addr {
|
|
struct in_addr in4;
|
|
struct in6_addr in6;
|
|
};
|
|
|
|
struct vxlan_softc;
|
|
|
|
struct vxlan_peer {
|
|
RBT_ENTRY(vxlan_peer) p_entry;
|
|
|
|
struct vxlan_header p_header;
|
|
union vxlan_addr p_addr;
|
|
|
|
struct vxlan_softc *p_sc;
|
|
};
|
|
|
|
RBT_HEAD(vxlan_peers, vxlan_peer);
|
|
|
|
struct vxlan_tep {
|
|
TAILQ_ENTRY(vxlan_tep) vt_entry;
|
|
|
|
sa_family_t vt_af;
|
|
unsigned int vt_rdomain;
|
|
union vxlan_addr vt_addr;
|
|
#define vt_addr4 vt_addr.in4
|
|
#define vt_addr6 vt_addr.in6
|
|
in_port_t vt_port;
|
|
|
|
struct socket *vt_so;
|
|
|
|
struct mutex vt_mtx;
|
|
struct vxlan_peers vt_peers;
|
|
};
|
|
|
|
TAILQ_HEAD(vxlan_teps, vxlan_tep);
|
|
|
|
enum vxlan_tunnel_mode {
|
|
VXLAN_TMODE_UNSET,
|
|
VXLAN_TMODE_P2P, /* unicast destination, no learning */
|
|
VXLAN_TMODE_LEARNING, /* multicast destination, learning */
|
|
VXLAN_TMODE_ENDPOINT, /* unset destination, no learning */
|
|
};
|
|
|
|
struct vxlan_softc {
|
|
struct arpcom sc_ac;
|
|
struct etherbridge sc_eb;
|
|
|
|
unsigned int sc_rdomain;
|
|
sa_family_t sc_af;
|
|
union vxlan_addr sc_src;
|
|
union vxlan_addr sc_dst;
|
|
in_port_t sc_port;
|
|
struct vxlan_header sc_header;
|
|
unsigned int sc_if_index0;
|
|
|
|
struct task sc_dtask;
|
|
void *sc_inmulti;
|
|
|
|
enum vxlan_tunnel_mode sc_mode;
|
|
struct vxlan_peer *sc_ucast_peer;
|
|
struct vxlan_peer *sc_mcast_peer;
|
|
struct refcnt sc_refs;
|
|
|
|
uint16_t sc_df;
|
|
int sc_ttl;
|
|
int sc_txhprio;
|
|
int sc_rxhprio;
|
|
|
|
struct task sc_send_task;
|
|
};
|
|
|
|
void vxlanattach(int);
|
|
|
|
static int vxlan_clone_create(struct if_clone *, int);
|
|
static int vxlan_clone_destroy(struct ifnet *);
|
|
|
|
static int vxlan_output(struct ifnet *, struct mbuf *,
|
|
struct sockaddr *, struct rtentry *);
|
|
static int vxlan_enqueue(struct ifnet *, struct mbuf *);
|
|
static void vxlan_start(struct ifqueue *);
|
|
static void vxlan_send(void *);
|
|
|
|
static int vxlan_ioctl(struct ifnet *, u_long, caddr_t);
|
|
static int vxlan_up(struct vxlan_softc *);
|
|
static int vxlan_down(struct vxlan_softc *);
|
|
static int vxlan_addmulti(struct vxlan_softc *, struct ifnet *);
|
|
static void vxlan_delmulti(struct vxlan_softc *);
|
|
|
|
static struct mbuf *
|
|
vxlan_input(void *, struct mbuf *,
|
|
struct ip *, struct ip6_hdr *, void *, int);
|
|
|
|
static int vxlan_set_rdomain(struct vxlan_softc *, const struct ifreq *);
|
|
static int vxlan_get_rdomain(struct vxlan_softc *, struct ifreq *);
|
|
static int vxlan_set_tunnel(struct vxlan_softc *,
|
|
const struct if_laddrreq *);
|
|
static int vxlan_get_tunnel(struct vxlan_softc *, struct if_laddrreq *);
|
|
static int vxlan_del_tunnel(struct vxlan_softc *);
|
|
static int vxlan_set_vnetid(struct vxlan_softc *, const struct ifreq *);
|
|
static int vxlan_get_vnetid(struct vxlan_softc *, struct ifreq *);
|
|
static int vxlan_del_vnetid(struct vxlan_softc *);
|
|
static int vxlan_set_parent(struct vxlan_softc *,
|
|
const struct if_parent *);
|
|
static int vxlan_get_parent(struct vxlan_softc *, struct if_parent *);
|
|
static int vxlan_del_parent(struct vxlan_softc *);
|
|
|
|
static int vxlan_add_addr(struct vxlan_softc *, const struct ifbareq *);
|
|
static int vxlan_del_addr(struct vxlan_softc *, const struct ifbareq *);
|
|
|
|
static void vxlan_detach_hook(void *);
|
|
|
|
static struct if_clone vxlan_cloner =
|
|
IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy);
|
|
|
|
static int vxlan_eb_port_eq(void *, void *, void *);
|
|
static void *vxlan_eb_port_take(void *, void *);
|
|
static void vxlan_eb_port_rele(void *, void *);
|
|
static size_t vxlan_eb_port_ifname(void *, char *, size_t, void *);
|
|
static void vxlan_eb_port_sa(void *, struct sockaddr_storage *, void *);
|
|
|
|
static const struct etherbridge_ops vxlan_etherbridge_ops = {
|
|
vxlan_eb_port_eq,
|
|
vxlan_eb_port_take,
|
|
vxlan_eb_port_rele,
|
|
vxlan_eb_port_ifname,
|
|
vxlan_eb_port_sa,
|
|
};
|
|
|
|
static struct rwlock vxlan_lock = RWLOCK_INITIALIZER("vteps");
|
|
static struct vxlan_teps vxlan_teps = TAILQ_HEAD_INITIALIZER(vxlan_teps);
|
|
static struct pool vxlan_endpoint_pool;
|
|
|
|
static inline int vxlan_peer_cmp(const struct vxlan_peer *,
|
|
const struct vxlan_peer *);
|
|
|
|
RBT_PROTOTYPE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp);
|
|
|
|
void
|
|
vxlanattach(int count)
|
|
{
|
|
if_clone_attach(&vxlan_cloner);
|
|
}
|
|
|
|
static int
|
|
vxlan_clone_create(struct if_clone *ifc, int unit)
|
|
{
|
|
struct vxlan_softc *sc;
|
|
struct ifnet *ifp;
|
|
int error;
|
|
|
|
if (vxlan_endpoint_pool.pr_size == 0) {
|
|
pool_init(&vxlan_endpoint_pool, sizeof(union vxlan_addr),
|
|
0, IPL_SOFTNET, 0, "vxlanep", NULL);
|
|
}
|
|
|
|
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
|
|
if (sc == NULL)
|
|
return (ENOMEM);
|
|
|
|
ifp = &sc->sc_ac.ac_if;
|
|
|
|
snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
|
|
ifc->ifc_name, unit);
|
|
|
|
error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
|
|
&vxlan_etherbridge_ops, sc);
|
|
if (error == -1) {
|
|
free(sc, M_DEVBUF, sizeof(*sc));
|
|
return (error);
|
|
}
|
|
|
|
sc->sc_af = AF_UNSPEC;
|
|
sc->sc_txhprio = 0;
|
|
sc->sc_rxhprio = IF_HDRPRIO_OUTER;
|
|
sc->sc_df = 0;
|
|
sc->sc_ttl = IP_DEFAULT_MULTICAST_TTL;
|
|
|
|
task_set(&sc->sc_dtask, vxlan_detach_hook, sc);
|
|
refcnt_init(&sc->sc_refs);
|
|
task_set(&sc->sc_send_task, vxlan_send, sc);
|
|
|
|
ifp->if_softc = sc;
|
|
ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
|
|
ifp->if_ioctl = vxlan_ioctl;
|
|
ifp->if_output = vxlan_output;
|
|
ifp->if_enqueue = vxlan_enqueue;
|
|
ifp->if_qstart = vxlan_start;
|
|
ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
|
|
ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
|
|
ether_fakeaddr(ifp);
|
|
|
|
if_counters_alloc(ifp);
|
|
if_attach(ifp);
|
|
ether_ifattach(ifp);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vxlan_clone_destroy(struct ifnet *ifp)
|
|
{
|
|
struct vxlan_softc *sc = ifp->if_softc;
|
|
|
|
NET_LOCK();
|
|
if (ISSET(ifp->if_flags, IFF_RUNNING))
|
|
vxlan_down(sc);
|
|
NET_UNLOCK();
|
|
|
|
ether_ifdetach(ifp);
|
|
if_detach(ifp);
|
|
|
|
etherbridge_destroy(&sc->sc_eb);
|
|
|
|
refcnt_finalize(&sc->sc_refs, "vxlanfini");
|
|
|
|
free(sc, M_DEVBUF, sizeof(*sc));
|
|
|
|
return (0);
|
|
}
|
|
|
|
static struct vxlan_softc *
|
|
vxlan_take(struct vxlan_softc *sc)
|
|
{
|
|
refcnt_take(&sc->sc_refs);
|
|
return (sc);
|
|
}
|
|
|
|
static void
|
|
vxlan_rele(struct vxlan_softc *sc)
|
|
{
|
|
refcnt_rele_wake(&sc->sc_refs);
|
|
}
|
|
|
|
static struct mbuf *
|
|
vxlan_encap(struct vxlan_softc *sc, struct mbuf *m,
|
|
struct mbuf *(ip_encap)(struct vxlan_softc *sc, struct mbuf *,
|
|
const union vxlan_addr *, uint8_t))
|
|
{
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
struct m_tag *mtag;
|
|
struct mbuf *m0;
|
|
union vxlan_addr gateway;
|
|
const union vxlan_addr *endpoint;
|
|
struct vxlan_header *vh;
|
|
struct udphdr *uh;
|
|
int prio;
|
|
uint8_t tos;
|
|
|
|
if (sc->sc_mode == VXLAN_TMODE_UNSET)
|
|
goto drop;
|
|
|
|
if (sc->sc_mode == VXLAN_TMODE_P2P)
|
|
endpoint = &sc->sc_dst;
|
|
else { /* VXLAN_TMODE_LEARNING || VXLAN_TMODE_ENDPOINT */
|
|
struct ether_header *eh = mtod(m, struct ether_header *);
|
|
|
|
smr_read_enter();
|
|
endpoint = etherbridge_resolve_ea(&sc->sc_eb,
|
|
(struct ether_addr *)eh->ether_dhost);
|
|
if (endpoint != NULL) {
|
|
gateway = *endpoint;
|
|
endpoint = &gateway;
|
|
}
|
|
smr_read_leave();
|
|
|
|
if (endpoint == NULL) {
|
|
if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
|
|
goto drop;
|
|
|
|
/* "flood" to unknown destinations */
|
|
endpoint = &sc->sc_dst;
|
|
}
|
|
}
|
|
|
|
/* force prepend mbuf because of payload alignment */
|
|
m0 = m_get(M_DONTWAIT, m->m_type);
|
|
if (m0 == NULL)
|
|
goto drop;
|
|
|
|
m_align(m0, 0);
|
|
m0->m_len = 0;
|
|
|
|
M_MOVE_PKTHDR(m0, m);
|
|
m0->m_next = m;
|
|
|
|
m = m_prepend(m0, sizeof(*vh), M_DONTWAIT);
|
|
if (m == NULL)
|
|
return (NULL);
|
|
|
|
vh = mtod(m, struct vxlan_header *);
|
|
*vh = sc->sc_header;
|
|
|
|
m = m_prepend(m, sizeof(*uh), M_DONTWAIT);
|
|
if (m == NULL)
|
|
return (NULL);
|
|
|
|
uh = mtod(m, struct udphdr *);
|
|
uh->uh_sport = sc->sc_port; /* XXX */
|
|
uh->uh_dport = sc->sc_port;
|
|
htobem16(&uh->uh_ulen, m->m_pkthdr.len);
|
|
uh->uh_sum = htons(0);
|
|
|
|
SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT);
|
|
|
|
mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
|
|
if (mtag == NULL)
|
|
goto drop;
|
|
|
|
*(int *)(mtag + 1) = ifp->if_index;
|
|
m_tag_prepend(m, mtag);
|
|
|
|
prio = sc->sc_txhprio;
|
|
if (prio == IF_HDRPRIO_PACKET)
|
|
prio = m->m_pkthdr.pf.prio;
|
|
tos = IFQ_PRIO2TOS(prio);
|
|
|
|
CLR(m->m_flags, M_BCAST|M_MCAST);
|
|
m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
|
|
|
|
#if NPF > 0
|
|
pf_pkt_addr_changed(m);
|
|
#endif
|
|
|
|
return ((*ip_encap)(sc, m, endpoint, tos));
|
|
drop:
|
|
m_freem(m);
|
|
return (NULL);
|
|
}
|
|
|
|
static struct mbuf *
|
|
vxlan_encap_ipv4(struct vxlan_softc *sc, struct mbuf *m,
|
|
const union vxlan_addr *endpoint, uint8_t tos)
|
|
{
|
|
struct ip *ip;
|
|
|
|
m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
|
|
if (m == NULL)
|
|
return (NULL);
|
|
|
|
ip = mtod(m, struct ip *);
|
|
ip->ip_v = IPVERSION;
|
|
ip->ip_hl = sizeof(*ip) >> 2;
|
|
ip->ip_off = sc->sc_df;
|
|
ip->ip_tos = tos;
|
|
ip->ip_len = htons(m->m_pkthdr.len);
|
|
ip->ip_ttl = sc->sc_ttl;
|
|
ip->ip_p = IPPROTO_UDP;
|
|
ip->ip_src = sc->sc_src.in4;
|
|
ip->ip_dst = endpoint->in4;
|
|
|
|
return (m);
|
|
}
|
|
|
|
#ifdef INET6
|
|
static struct mbuf *
|
|
vxlan_encap_ipv6(struct vxlan_softc *sc, struct mbuf *m,
|
|
const union vxlan_addr *endpoint, uint8_t tos)
|
|
{
|
|
struct ip6_hdr *ip6;
|
|
int len = m->m_pkthdr.len;
|
|
|
|
m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
|
|
if (m == NULL)
|
|
return (NULL);
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
|
ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ?
|
|
htonl(m->m_pkthdr.ph_flowid) : 0;
|
|
ip6->ip6_vfc |= IPV6_VERSION;
|
|
ip6->ip6_flow |= htonl((uint32_t)tos << 20);
|
|
ip6->ip6_plen = htons(len);
|
|
ip6->ip6_nxt = IPPROTO_UDP;
|
|
ip6->ip6_hlim = sc->sc_ttl;
|
|
ip6->ip6_src = sc->sc_src.in6;
|
|
ip6->ip6_dst = endpoint->in6;
|
|
|
|
if (sc->sc_df)
|
|
SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
|
|
|
|
return (m);
|
|
}
|
|
#endif /* INET6 */
|
|
|
|
static int
|
|
vxlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
|
|
struct rtentry *rt)
|
|
{
|
|
struct m_tag *mtag;
|
|
|
|
mtag = NULL;
|
|
while ((mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) != NULL) {
|
|
if (*(int *)(mtag + 1) == ifp->if_index) {
|
|
m_freem(m);
|
|
return (EIO);
|
|
}
|
|
}
|
|
|
|
return (ether_output(ifp, m, dst, rt));
|
|
}
|
|
|
|
static int
|
|
vxlan_enqueue(struct ifnet *ifp, struct mbuf *m)
|
|
{
|
|
struct vxlan_softc *sc = ifp->if_softc;
|
|
struct ifqueue *ifq = &ifp->if_snd;
|
|
|
|
if (ifq_enqueue(ifq, m) != 0)
|
|
return (ENOBUFS);
|
|
|
|
task_add(ifq->ifq_softnet, &sc->sc_send_task);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
vxlan_start(struct ifqueue *ifq)
|
|
{
|
|
struct ifnet *ifp = ifq->ifq_if;
|
|
struct vxlan_softc *sc = ifp->if_softc;
|
|
|
|
task_add(ifq->ifq_softnet, &sc->sc_send_task);
|
|
}
|
|
|
|
static uint64_t
|
|
vxlan_send_ipv4(struct vxlan_softc *sc, struct mbuf_list *ml)
|
|
{
|
|
struct ip_moptions imo;
|
|
struct mbuf *m;
|
|
uint64_t oerrors = 0;
|
|
|
|
imo.imo_ifidx = sc->sc_if_index0;
|
|
imo.imo_ttl = sc->sc_ttl;
|
|
imo.imo_loop = 0;
|
|
|
|
NET_LOCK();
|
|
while ((m = ml_dequeue(ml)) != NULL) {
|
|
if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
|
|
oerrors++;
|
|
}
|
|
NET_UNLOCK();
|
|
|
|
return (oerrors);
|
|
}
|
|
|
|
#ifdef INET6
|
|
static uint64_t
|
|
vxlan_send_ipv6(struct vxlan_softc *sc, struct mbuf_list *ml)
|
|
{
|
|
struct ip6_moptions im6o;
|
|
struct mbuf *m;
|
|
uint64_t oerrors = 0;
|
|
|
|
im6o.im6o_ifidx = sc->sc_if_index0;
|
|
im6o.im6o_hlim = sc->sc_ttl;
|
|
im6o.im6o_loop = 0;
|
|
|
|
NET_LOCK();
|
|
while ((m = ml_dequeue(ml)) != NULL) {
|
|
if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
|
|
oerrors++;
|
|
}
|
|
NET_UNLOCK();
|
|
|
|
return (oerrors);
|
|
}
|
|
#endif /* INET6 */
|
|
|
|
static void
|
|
vxlan_send(void *arg)
|
|
{
|
|
struct vxlan_softc *sc = arg;
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
struct mbuf *(*ip_encap)(struct vxlan_softc *, struct mbuf *,
|
|
const union vxlan_addr *, uint8_t);
|
|
uint64_t (*ip_send)(struct vxlan_softc *, struct mbuf_list *);
|
|
struct mbuf_list ml = MBUF_LIST_INITIALIZER();
|
|
struct mbuf *m;
|
|
uint64_t oerrors;
|
|
|
|
if (!ISSET(ifp->if_flags, IFF_RUNNING))
|
|
return;
|
|
|
|
switch (sc->sc_af) {
|
|
case AF_INET:
|
|
ip_encap = vxlan_encap_ipv4;
|
|
ip_send = vxlan_send_ipv4;
|
|
break;
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
ip_encap = vxlan_encap_ipv6;
|
|
ip_send = vxlan_send_ipv6;
|
|
break;
|
|
#endif
|
|
default:
|
|
unhandled_af(sc->sc_af);
|
|
/* NOTREACHED */
|
|
}
|
|
|
|
while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
|
|
#if NBPFILTER > 0
|
|
caddr_t if_bpf = READ_ONCE(ifp->if_bpf);
|
|
if (if_bpf != NULL)
|
|
bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
|
|
#endif
|
|
m = vxlan_encap(sc, m, ip_encap);
|
|
if (m == NULL)
|
|
continue;
|
|
|
|
ml_enqueue(&ml, m);
|
|
}
|
|
|
|
oerrors = (*ip_send)(sc, &ml);
|
|
|
|
counters_add(ifp->if_counters, ifc_oerrors, oerrors);
|
|
}
|
|
|
|
static struct mbuf *
|
|
vxlan_input(void *arg, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
|
|
void *uhp, int hlen)
|
|
{
|
|
struct vxlan_tep *vt = arg;
|
|
union vxlan_addr addr;
|
|
struct vxlan_peer key, *p;
|
|
struct udphdr *uh;
|
|
struct vxlan_header *vh;
|
|
struct ether_header *eh;
|
|
int vhlen = hlen + sizeof(*vh);
|
|
struct mbuf *n;
|
|
int off;
|
|
in_port_t port;
|
|
struct vxlan_softc *sc = NULL;
|
|
struct ifnet *ifp;
|
|
int rxhprio;
|
|
uint8_t tos;
|
|
|
|
if (m->m_pkthdr.len < vhlen)
|
|
goto drop;
|
|
|
|
uh = uhp;
|
|
port = uh->uh_sport;
|
|
|
|
if (ip != NULL) {
|
|
memset(&addr, 0, sizeof(addr));
|
|
addr.in4 = ip->ip_src;
|
|
tos = ip->ip_tos;
|
|
}
|
|
#ifdef INET6
|
|
else {
|
|
addr.in6 = ip6->ip6_src;
|
|
tos = bemtoh32(&ip6->ip6_flow) >> 20;
|
|
}
|
|
#endif
|
|
|
|
if (m->m_len < vhlen) {
|
|
m = m_pullup(m, vhlen);
|
|
if (m == NULL)
|
|
return (NULL);
|
|
}
|
|
|
|
/* can't use ip/ip6/uh after this */
|
|
|
|
vh = (struct vxlan_header *)(mtod(m, caddr_t) + hlen);
|
|
|
|
memset(&key, 0, sizeof(key));
|
|
key.p_addr = addr;
|
|
key.p_header.vxlan_flags = vh->vxlan_flags & htonl(VXLAN_F_I);
|
|
key.p_header.vxlan_id = vh->vxlan_id & htonl(VXLAN_VNI_MASK);
|
|
|
|
mtx_enter(&vt->vt_mtx);
|
|
p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key);
|
|
if (p == NULL) {
|
|
memset(&key.p_addr, 0, sizeof(key.p_addr));
|
|
p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key);
|
|
}
|
|
if (p != NULL)
|
|
sc = vxlan_take(p->p_sc);
|
|
mtx_leave(&vt->vt_mtx);
|
|
|
|
if (sc == NULL)
|
|
goto drop;
|
|
|
|
ifp = &sc->sc_ac.ac_if;
|
|
if (ISSET(ifp->if_flags, IFF_LINK0) && port != sc->sc_port)
|
|
goto rele_drop;
|
|
|
|
m_adj(m, vhlen);
|
|
|
|
if (m->m_pkthdr.len < sizeof(*eh))
|
|
goto rele_drop;
|
|
|
|
if (m->m_len < sizeof(*eh)) {
|
|
m = m_pullup(m, sizeof(*eh));
|
|
if (m == NULL)
|
|
goto rele;
|
|
}
|
|
|
|
n = m_getptr(m, sizeof(*eh), &off);
|
|
if (n == NULL)
|
|
goto rele_drop;
|
|
|
|
if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
|
|
n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
|
|
m_freem(m);
|
|
if (n == NULL)
|
|
goto rele;
|
|
m = n;
|
|
}
|
|
|
|
if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
|
|
eh = mtod(m, struct ether_header *);
|
|
etherbridge_map_ea(&sc->sc_eb, &addr,
|
|
(struct ether_addr *)eh->ether_shost);
|
|
}
|
|
|
|
rxhprio = sc->sc_rxhprio;
|
|
switch (rxhprio) {
|
|
case IF_HDRPRIO_PACKET:
|
|
/* nop */
|
|
break;
|
|
case IF_HDRPRIO_OUTER:
|
|
m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos);
|
|
break;
|
|
default:
|
|
m->m_pkthdr.pf.prio = rxhprio;
|
|
break; \
|
|
} \
|
|
|
|
if_vinput(ifp, m);
|
|
rele:
|
|
vxlan_rele(sc);
|
|
return (NULL);
|
|
|
|
rele_drop:
|
|
vxlan_rele(sc);
|
|
drop:
|
|
m_freem(m);
|
|
return (NULL);
|
|
}
|
|
|
|
static int
|
|
vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
|
|
{
|
|
struct vxlan_softc *sc = ifp->if_softc;
|
|
struct ifreq *ifr = (struct ifreq *)data;
|
|
struct ifbrparam *bparam = (struct ifbrparam *)data;
|
|
int error = 0;
|
|
|
|
switch (cmd) {
|
|
case SIOCSIFADDR:
|
|
break;
|
|
case SIOCSIFFLAGS:
|
|
if (ISSET(ifp->if_flags, IFF_UP)) {
|
|
if (!ISSET(ifp->if_flags, IFF_RUNNING))
|
|
error = vxlan_up(sc);
|
|
else
|
|
error = 0;
|
|
} else {
|
|
if (ISSET(ifp->if_flags, IFF_RUNNING))
|
|
error = vxlan_down(sc);
|
|
}
|
|
break;
|
|
|
|
case SIOCSLIFPHYRTABLE:
|
|
error = vxlan_set_rdomain(sc, ifr);
|
|
break;
|
|
case SIOCGLIFPHYRTABLE:
|
|
error = vxlan_get_rdomain(sc, ifr);
|
|
break;
|
|
|
|
case SIOCSLIFPHYADDR:
|
|
error = vxlan_set_tunnel(sc, (const struct if_laddrreq *)data);
|
|
break;
|
|
case SIOCGLIFPHYADDR:
|
|
error = vxlan_get_tunnel(sc, (struct if_laddrreq *)data);
|
|
break;
|
|
case SIOCDIFPHYADDR:
|
|
error = vxlan_del_tunnel(sc);
|
|
break;
|
|
|
|
case SIOCSVNETID:
|
|
error = vxlan_set_vnetid(sc, ifr);
|
|
break;
|
|
case SIOCGVNETID:
|
|
error = vxlan_get_vnetid(sc, ifr);
|
|
break;
|
|
case SIOCDVNETID:
|
|
error = vxlan_del_vnetid(sc);
|
|
break;
|
|
|
|
case SIOCSIFPARENT:
|
|
error = vxlan_set_parent(sc, (struct if_parent *)data);
|
|
break;
|
|
case SIOCGIFPARENT:
|
|
error = vxlan_get_parent(sc, (struct if_parent *)data);
|
|
break;
|
|
case SIOCDIFPARENT:
|
|
error = vxlan_del_parent(sc);
|
|
break;
|
|
|
|
case SIOCSTXHPRIO:
|
|
error = if_txhprio_l2_check(ifr->ifr_hdrprio);
|
|
if (error != 0)
|
|
break;
|
|
|
|
sc->sc_txhprio = ifr->ifr_hdrprio;
|
|
break;
|
|
case SIOCGTXHPRIO:
|
|
ifr->ifr_hdrprio = sc->sc_txhprio;
|
|
break;
|
|
|
|
case SIOCSRXHPRIO:
|
|
error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
|
|
if (error != 0)
|
|
break;
|
|
|
|
sc->sc_rxhprio = ifr->ifr_hdrprio;
|
|
break;
|
|
case SIOCGRXHPRIO:
|
|
ifr->ifr_hdrprio = sc->sc_rxhprio;
|
|
break;
|
|
|
|
case SIOCSLIFPHYDF:
|
|
/* commit */
|
|
sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
|
|
break;
|
|
case SIOCGLIFPHYDF:
|
|
ifr->ifr_df = sc->sc_df ? 1 : 0;
|
|
break;
|
|
|
|
case SIOCSLIFPHYTTL:
|
|
if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
|
|
error = EINVAL;
|
|
break;
|
|
}
|
|
|
|
/* commit */
|
|
sc->sc_ttl = (uint8_t)ifr->ifr_ttl;
|
|
break;
|
|
case SIOCGLIFPHYTTL:
|
|
ifr->ifr_ttl = (int)sc->sc_ttl;
|
|
break;
|
|
|
|
case SIOCBRDGSCACHE:
|
|
error = etherbridge_set_max(&sc->sc_eb, bparam);
|
|
break;
|
|
case SIOCBRDGGCACHE:
|
|
error = etherbridge_get_max(&sc->sc_eb, bparam);
|
|
break;
|
|
case SIOCBRDGSTO:
|
|
error = etherbridge_set_tmo(&sc->sc_eb, bparam);
|
|
break;
|
|
case SIOCBRDGGTO:
|
|
error = etherbridge_get_tmo(&sc->sc_eb, bparam);
|
|
break;
|
|
|
|
case SIOCBRDGRTS:
|
|
error = etherbridge_rtfind(&sc->sc_eb,
|
|
(struct ifbaconf *)data);
|
|
break;
|
|
case SIOCBRDGFLUSH:
|
|
etherbridge_flush(&sc->sc_eb,
|
|
((struct ifbreq *)data)->ifbr_ifsflags);
|
|
break;
|
|
case SIOCBRDGSADDR:
|
|
error = vxlan_add_addr(sc, (struct ifbareq *)data);
|
|
break;
|
|
case SIOCBRDGDADDR:
|
|
error = vxlan_del_addr(sc, (struct ifbareq *)data);
|
|
break;
|
|
|
|
case SIOCADDMULTI:
|
|
case SIOCDELMULTI:
|
|
/* no hardware to program */
|
|
break;
|
|
|
|
default:
|
|
error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
|
|
break;
|
|
}
|
|
|
|
if (error == ENETRESET) {
|
|
/* no hardware to program */
|
|
error = 0;
|
|
}
|
|
|
|
return (error);
|
|
}
|
|
|
|
static struct vxlan_tep *
|
|
vxlan_tep_get(struct vxlan_softc *sc, const union vxlan_addr *addr)
|
|
{
|
|
struct vxlan_tep *vt;
|
|
|
|
TAILQ_FOREACH(vt, &vxlan_teps, vt_entry) {
|
|
if (sc->sc_af == vt->vt_af &&
|
|
sc->sc_rdomain == vt->vt_rdomain &&
|
|
memcmp(addr, &vt->vt_addr, sizeof(*addr)) == 0 &&
|
|
sc->sc_port == vt->vt_port)
|
|
return (vt);
|
|
}
|
|
|
|
return (NULL);
|
|
}
|
|
|
|
static int
|
|
vxlan_tep_add_addr(struct vxlan_softc *sc, const union vxlan_addr *addr,
|
|
struct vxlan_peer *p)
|
|
{
|
|
struct mbuf m;
|
|
struct vxlan_tep *vt;
|
|
struct socket *so;
|
|
struct sockaddr_in *sin;
|
|
#ifdef INET6
|
|
struct sockaddr_in6 *sin6;
|
|
#endif
|
|
int error;
|
|
|
|
vt = vxlan_tep_get(sc, addr);
|
|
if (vt != NULL) {
|
|
struct vxlan_peer *op;
|
|
|
|
mtx_enter(&vt->vt_mtx);
|
|
op = RBT_INSERT(vxlan_peers, &vt->vt_peers, p);
|
|
mtx_leave(&vt->vt_mtx);
|
|
|
|
if (op != NULL)
|
|
return (EADDRINUSE);
|
|
|
|
return (0);
|
|
}
|
|
|
|
vt = malloc(sizeof(*vt), M_DEVBUF, M_NOWAIT|M_ZERO);
|
|
if (vt == NULL)
|
|
return (ENOMEM);
|
|
|
|
vt->vt_af = sc->sc_af;
|
|
vt->vt_rdomain = sc->sc_rdomain;
|
|
vt->vt_addr = *addr;
|
|
vt->vt_port = sc->sc_port;
|
|
|
|
mtx_init(&vt->vt_mtx, IPL_SOFTNET);
|
|
RBT_INIT(vxlan_peers, &vt->vt_peers);
|
|
RBT_INSERT(vxlan_peers, &vt->vt_peers, p);
|
|
|
|
error = socreate(vt->vt_af, &so, SOCK_DGRAM, IPPROTO_UDP);
|
|
if (error != 0)
|
|
goto free;
|
|
|
|
solock(so);
|
|
sotoinpcb(so)->inp_upcall = vxlan_input;
|
|
sotoinpcb(so)->inp_upcall_arg = vt;
|
|
sounlock(so);
|
|
|
|
m_inithdr(&m);
|
|
m.m_len = sizeof(vt->vt_rdomain);
|
|
*mtod(&m, unsigned int *) = vt->vt_rdomain;
|
|
error = sosetopt(so, SOL_SOCKET, SO_RTABLE, &m);
|
|
if (error != 0)
|
|
goto close;
|
|
|
|
m_inithdr(&m);
|
|
switch (vt->vt_af) {
|
|
case AF_INET:
|
|
sin = mtod(&m, struct sockaddr_in *);
|
|
memset(sin, 0, sizeof(*sin));
|
|
sin->sin_len = sizeof(*sin);
|
|
sin->sin_family = AF_INET;
|
|
sin->sin_addr = addr->in4;
|
|
sin->sin_port = vt->vt_port;
|
|
|
|
m.m_len = sizeof(*sin);
|
|
break;
|
|
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
sin6 = mtod(&m, struct sockaddr_in6 *);
|
|
sin6->sin6_len = sizeof(*sin6);
|
|
sin6->sin6_family = AF_INET6;
|
|
in6_recoverscope(sin6, &addr->in6);
|
|
sin6->sin6_port = sc->sc_port;
|
|
|
|
m.m_len = sizeof(*sin6);
|
|
break;
|
|
#endif
|
|
default:
|
|
unhandled_af(vt->vt_af);
|
|
}
|
|
|
|
solock(so);
|
|
error = sobind(so, &m, curproc);
|
|
sounlock(so);
|
|
if (error != 0)
|
|
goto close;
|
|
|
|
rw_assert_wrlock(&vxlan_lock);
|
|
TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry);
|
|
|
|
vt->vt_so = so;
|
|
|
|
return (0);
|
|
|
|
close:
|
|
soclose(so, MSG_DONTWAIT);
|
|
free:
|
|
free(vt, M_DEVBUF, sizeof(*vt));
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
vxlan_tep_del_addr(struct vxlan_softc *sc, const union vxlan_addr *addr,
|
|
struct vxlan_peer *p)
|
|
{
|
|
struct vxlan_tep *vt;
|
|
int empty;
|
|
|
|
vt = vxlan_tep_get(sc, addr);
|
|
if (vt == NULL)
|
|
panic("unable to find vxlan_tep for peer %p (sc %p)", p, sc);
|
|
|
|
mtx_enter(&vt->vt_mtx);
|
|
RBT_REMOVE(vxlan_peers, &vt->vt_peers, p);
|
|
empty = RBT_EMPTY(vxlan_peers, &vt->vt_peers);
|
|
mtx_leave(&vt->vt_mtx);
|
|
|
|
if (!empty)
|
|
return;
|
|
|
|
rw_assert_wrlock(&vxlan_lock);
|
|
TAILQ_REMOVE(&vxlan_teps, vt, vt_entry);
|
|
|
|
soclose(vt->vt_so, MSG_DONTWAIT);
|
|
free(vt, M_DEVBUF, sizeof(*vt));
|
|
}
|
|
|
|
static int
|
|
vxlan_tep_up(struct vxlan_softc *sc)
|
|
{
|
|
struct vxlan_peer *up, *mp;
|
|
int error;
|
|
|
|
up = malloc(sizeof(*up), M_DEVBUF, M_NOWAIT|M_ZERO);
|
|
if (up == NULL)
|
|
return (ENOMEM);
|
|
|
|
if (sc->sc_mode == VXLAN_TMODE_P2P)
|
|
up->p_addr = sc->sc_dst;
|
|
up->p_header = sc->sc_header;
|
|
up->p_sc = vxlan_take(sc);
|
|
|
|
error = vxlan_tep_add_addr(sc, &sc->sc_src, up);
|
|
if (error != 0)
|
|
goto freeup;
|
|
|
|
sc->sc_ucast_peer = up;
|
|
|
|
if (sc->sc_mode != VXLAN_TMODE_LEARNING)
|
|
return (0);
|
|
|
|
mp = malloc(sizeof(*mp), M_DEVBUF, M_NOWAIT|M_ZERO);
|
|
if (mp == NULL) {
|
|
error = ENOMEM;
|
|
goto delup;
|
|
}
|
|
|
|
/* addr is multicast, leave it as 0s */
|
|
mp->p_header = sc->sc_header;
|
|
mp->p_sc = vxlan_take(sc);
|
|
|
|
/* destination address is a multicast group we want to join */
|
|
error = vxlan_tep_add_addr(sc, &sc->sc_dst, up);
|
|
if (error != 0)
|
|
goto freemp;
|
|
|
|
sc->sc_mcast_peer = mp;
|
|
|
|
return (0);
|
|
|
|
freemp:
|
|
vxlan_rele(mp->p_sc);
|
|
free(mp, M_DEVBUF, sizeof(*mp));
|
|
delup:
|
|
vxlan_tep_del_addr(sc, &sc->sc_src, up);
|
|
freeup:
|
|
vxlan_rele(up->p_sc);
|
|
free(up, M_DEVBUF, sizeof(*up));
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
vxlan_tep_down(struct vxlan_softc *sc)
|
|
{
|
|
struct vxlan_peer *up = sc->sc_ucast_peer;
|
|
|
|
if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
|
|
struct vxlan_peer *mp = sc->sc_mcast_peer;
|
|
vxlan_tep_del_addr(sc, &sc->sc_dst, mp);
|
|
vxlan_rele(mp->p_sc);
|
|
free(mp, M_DEVBUF, sizeof(*mp));
|
|
}
|
|
|
|
vxlan_tep_del_addr(sc, &sc->sc_src, up);
|
|
vxlan_rele(up->p_sc);
|
|
free(up, M_DEVBUF, sizeof(*up));
|
|
}
|
|
|
|
static int
|
|
vxlan_up(struct vxlan_softc *sc)
|
|
{
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
struct ifnet *ifp0 = NULL;
|
|
int error;
|
|
|
|
KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
|
|
NET_ASSERT_LOCKED();
|
|
|
|
if (sc->sc_af == AF_UNSPEC)
|
|
return (EDESTADDRREQ);
|
|
KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET);
|
|
|
|
NET_UNLOCK();
|
|
|
|
error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR);
|
|
if (error != 0)
|
|
goto netlock;
|
|
|
|
NET_LOCK();
|
|
if (ISSET(ifp->if_flags, IFF_RUNNING)) {
|
|
/* something else beat us */
|
|
rw_exit(&vxlan_lock);
|
|
return (0);
|
|
}
|
|
NET_UNLOCK();
|
|
|
|
if (sc->sc_mode != VXLAN_TMODE_P2P) {
|
|
error = etherbridge_up(&sc->sc_eb);
|
|
if (error != 0)
|
|
goto unlock;
|
|
}
|
|
|
|
if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
|
|
ifp0 = if_get(sc->sc_if_index0);
|
|
if (ifp0 == NULL) {
|
|
error = ENXIO;
|
|
goto down;
|
|
}
|
|
|
|
/* check again if multicast will work on top of the parent */
|
|
if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
|
|
error = EPROTONOSUPPORT;
|
|
goto put;
|
|
}
|
|
|
|
error = vxlan_addmulti(sc, ifp0);
|
|
if (error != 0)
|
|
goto put;
|
|
|
|
/* Register callback if parent wants to unregister */
|
|
if_detachhook_add(ifp0, &sc->sc_dtask);
|
|
} else {
|
|
if (sc->sc_if_index0 != 0) {
|
|
error = EPROTONOSUPPORT;
|
|
goto down;
|
|
}
|
|
}
|
|
|
|
error = vxlan_tep_up(sc);
|
|
if (error != 0)
|
|
goto del;
|
|
|
|
if_put(ifp0);
|
|
|
|
NET_LOCK();
|
|
SET(ifp->if_flags, IFF_RUNNING);
|
|
rw_exit(&vxlan_lock);
|
|
|
|
return (0);
|
|
|
|
del:
|
|
if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
|
|
if (ifp0 != NULL)
|
|
if_detachhook_del(ifp0, &sc->sc_dtask);
|
|
vxlan_delmulti(sc);
|
|
}
|
|
put:
|
|
if_put(ifp0);
|
|
down:
|
|
if (sc->sc_mode != VXLAN_TMODE_P2P)
|
|
etherbridge_down(&sc->sc_eb);
|
|
unlock:
|
|
rw_exit(&vxlan_lock);
|
|
netlock:
|
|
NET_LOCK();
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
vxlan_down(struct vxlan_softc *sc)
|
|
{
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
struct ifnet *ifp0;
|
|
int error;
|
|
|
|
KASSERT(ISSET(ifp->if_flags, IFF_RUNNING));
|
|
NET_UNLOCK();
|
|
|
|
error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR);
|
|
if (error != 0) {
|
|
NET_LOCK();
|
|
return (error);
|
|
}
|
|
|
|
NET_LOCK();
|
|
if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
|
|
/* something else beat us */
|
|
rw_exit(&vxlan_lock);
|
|
return (0);
|
|
}
|
|
NET_UNLOCK();
|
|
|
|
vxlan_tep_down(sc);
|
|
|
|
if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
|
|
vxlan_delmulti(sc);
|
|
ifp0 = if_get(sc->sc_if_index0);
|
|
if (ifp0 != NULL) {
|
|
if_detachhook_del(ifp0, &sc->sc_dtask);
|
|
}
|
|
if_put(ifp0);
|
|
}
|
|
|
|
if (sc->sc_mode != VXLAN_TMODE_P2P)
|
|
etherbridge_down(&sc->sc_eb);
|
|
|
|
taskq_del_barrier(ifp->if_snd.ifq_softnet, &sc->sc_send_task);
|
|
NET_LOCK();
|
|
CLR(ifp->if_flags, IFF_RUNNING);
|
|
rw_exit(&vxlan_lock);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vxlan_addmulti(struct vxlan_softc *sc, struct ifnet *ifp0)
|
|
{
|
|
int error = 0;
|
|
|
|
NET_LOCK();
|
|
|
|
switch (sc->sc_af) {
|
|
case AF_INET:
|
|
sc->sc_inmulti = in_addmulti(&sc->sc_dst.in4, ifp0);
|
|
if (sc->sc_inmulti == NULL)
|
|
error = EADDRNOTAVAIL;
|
|
break;
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
sc->sc_inmulti = in6_addmulti(&sc->sc_dst.in6, ifp0, &error);
|
|
break;
|
|
#endif
|
|
default:
|
|
unhandled_af(sc->sc_af);
|
|
}
|
|
|
|
NET_UNLOCK();
|
|
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
vxlan_delmulti(struct vxlan_softc *sc)
|
|
{
|
|
NET_LOCK();
|
|
|
|
switch (sc->sc_af) {
|
|
case AF_INET:
|
|
in_delmulti(sc->sc_inmulti);
|
|
break;
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
in6_delmulti(sc->sc_inmulti);
|
|
break;
|
|
#endif
|
|
default:
|
|
unhandled_af(sc->sc_af);
|
|
}
|
|
|
|
sc->sc_inmulti = NULL; /* keep it tidy */
|
|
|
|
NET_UNLOCK();
|
|
}
|
|
|
|
static int
|
|
vxlan_set_rdomain(struct vxlan_softc *sc, const struct ifreq *ifr)
|
|
{
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
|
|
if (ifr->ifr_rdomainid < 0 ||
|
|
ifr->ifr_rdomainid > RT_TABLEID_MAX)
|
|
return (EINVAL);
|
|
if (!rtable_exists(ifr->ifr_rdomainid))
|
|
return (EADDRNOTAVAIL);
|
|
|
|
if (sc->sc_rdomain == ifr->ifr_rdomainid)
|
|
return (0);
|
|
|
|
if (ISSET(ifp->if_flags, IFF_RUNNING))
|
|
return (EBUSY);
|
|
|
|
/* commit */
|
|
sc->sc_rdomain = ifr->ifr_rdomainid;
|
|
etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vxlan_get_rdomain(struct vxlan_softc *sc, struct ifreq *ifr)
|
|
{
|
|
ifr->ifr_rdomainid = sc->sc_rdomain;
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vxlan_set_tunnel(struct vxlan_softc *sc, const struct if_laddrreq *req)
|
|
{
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
struct sockaddr *src = (struct sockaddr *)&req->addr;
|
|
struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
|
|
struct sockaddr_in *src4, *dst4;
|
|
#ifdef INET6
|
|
struct sockaddr_in6 *src6, *dst6;
|
|
int error;
|
|
#endif
|
|
union vxlan_addr saddr, daddr;
|
|
unsigned int mode = VXLAN_TMODE_ENDPOINT;
|
|
in_port_t port = htons(VXLAN_PORT);
|
|
|
|
memset(&saddr, 0, sizeof(saddr));
|
|
memset(&daddr, 0, sizeof(daddr));
|
|
|
|
/* validate */
|
|
switch (src->sa_family) {
|
|
case AF_INET:
|
|
src4 = (struct sockaddr_in *)src;
|
|
if (in_nullhost(src4->sin_addr) ||
|
|
IN_MULTICAST(src4->sin_addr.s_addr))
|
|
return (EINVAL);
|
|
|
|
if (src4->sin_port != htons(0))
|
|
port = src4->sin_port;
|
|
|
|
if (dst->sa_family != AF_UNSPEC) {
|
|
if (dst->sa_family != AF_INET)
|
|
return (EINVAL);
|
|
|
|
dst4 = (struct sockaddr_in *)dst;
|
|
if (in_nullhost(dst4->sin_addr))
|
|
return (EINVAL);
|
|
|
|
if (dst4->sin_port != htons(0))
|
|
return (EINVAL);
|
|
|
|
/* all good */
|
|
mode = IN_MULTICAST(dst4->sin_addr.s_addr) ?
|
|
VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P;
|
|
daddr.in4 = dst4->sin_addr;
|
|
}
|
|
|
|
saddr.in4 = src4->sin_addr;
|
|
break;
|
|
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
src6 = (struct sockaddr_in6 *)src;
|
|
if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
|
|
IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
|
|
return (EINVAL);
|
|
|
|
if (src6->sin6_port != htons(0))
|
|
port = src6->sin6_port;
|
|
|
|
if (dst->sa_family != AF_UNSPEC) {
|
|
if (dst->sa_family != AF_INET6)
|
|
return (EINVAL);
|
|
|
|
dst6 = (struct sockaddr_in6 *)dst;
|
|
if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr))
|
|
return (EINVAL);
|
|
|
|
if (src6->sin6_scope_id != dst6->sin6_scope_id)
|
|
return (EINVAL);
|
|
|
|
if (dst6->sin6_port != htons(0))
|
|
return (EINVAL);
|
|
|
|
/* all good */
|
|
mode = IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) ?
|
|
VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P;
|
|
error = in6_embedscope(&daddr.in6, dst6, NULL, NULL);
|
|
if (error != 0)
|
|
return (error);
|
|
}
|
|
|
|
error = in6_embedscope(&saddr.in6, src6, NULL, NULL);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
break;
|
|
#endif
|
|
default:
|
|
return (EAFNOSUPPORT);
|
|
}
|
|
|
|
if (memcmp(&sc->sc_src, &saddr, sizeof(sc->sc_src)) == 0 &&
|
|
memcmp(&sc->sc_dst, &daddr, sizeof(sc->sc_dst)) == 0 &&
|
|
sc->sc_port == port)
|
|
return (0);
|
|
|
|
if (ISSET(ifp->if_flags, IFF_RUNNING))
|
|
return (EBUSY);
|
|
|
|
/* commit */
|
|
sc->sc_af = src->sa_family;
|
|
sc->sc_src = saddr;
|
|
sc->sc_dst = daddr;
|
|
sc->sc_port = port;
|
|
sc->sc_mode = mode;
|
|
etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vxlan_get_tunnel(struct vxlan_softc *sc, struct if_laddrreq *req)
|
|
{
|
|
struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
|
|
struct sockaddr_in *sin;
|
|
#ifdef INET6
|
|
struct sockaddr_in6 *sin6;
|
|
#endif
|
|
|
|
if (sc->sc_af == AF_UNSPEC)
|
|
return (EADDRNOTAVAIL);
|
|
KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET);
|
|
|
|
memset(&req->addr, 0, sizeof(req->addr));
|
|
memset(&req->dstaddr, 0, sizeof(req->dstaddr));
|
|
|
|
/* default to endpoint */
|
|
dstaddr->sa_len = 2;
|
|
dstaddr->sa_family = AF_UNSPEC;
|
|
|
|
switch (sc->sc_af) {
|
|
case AF_INET:
|
|
sin = (struct sockaddr_in *)&req->addr;
|
|
sin->sin_len = sizeof(*sin);
|
|
sin->sin_family = AF_INET;
|
|
sin->sin_addr = sc->sc_src.in4;
|
|
sin->sin_port = sc->sc_port;
|
|
|
|
if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
|
|
break;
|
|
|
|
sin = (struct sockaddr_in *)&req->dstaddr;
|
|
sin->sin_len = sizeof(*sin);
|
|
sin->sin_family = AF_INET;
|
|
sin->sin_addr = sc->sc_dst.in4;
|
|
break;
|
|
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
sin6 = (struct sockaddr_in6 *)&req->addr;
|
|
sin6->sin6_len = sizeof(*sin6);
|
|
sin6->sin6_family = AF_INET6;
|
|
in6_recoverscope(sin6, &sc->sc_src.in6);
|
|
sin6->sin6_port = sc->sc_port;
|
|
|
|
if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
|
|
break;
|
|
|
|
sin6 = (struct sockaddr_in6 *)&req->dstaddr;
|
|
sin6->sin6_len = sizeof(*sin6);
|
|
sin6->sin6_family = AF_INET6;
|
|
in6_recoverscope(sin6, &sc->sc_dst.in6);
|
|
break;
|
|
#endif
|
|
default:
|
|
unhandled_af(sc->sc_af);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vxlan_del_tunnel(struct vxlan_softc *sc)
|
|
{
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
|
|
if (sc->sc_af == AF_UNSPEC)
|
|
return (0);
|
|
|
|
if (ISSET(ifp->if_flags, IFF_RUNNING))
|
|
return (EBUSY);
|
|
|
|
/* commit */
|
|
sc->sc_af = AF_UNSPEC;
|
|
memset(&sc->sc_src, 0, sizeof(sc->sc_src));
|
|
memset(&sc->sc_dst, 0, sizeof(sc->sc_dst));
|
|
sc->sc_port = htons(0);
|
|
sc->sc_mode = VXLAN_TMODE_UNSET;
|
|
etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vxlan_set_vnetid(struct vxlan_softc *sc, const struct ifreq *ifr)
|
|
{
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
uint32_t vni;
|
|
|
|
if (ifr->ifr_vnetid < VXLAN_VNI_MIN ||
|
|
ifr->ifr_vnetid > VXLAN_VNI_MAX)
|
|
return (EINVAL);
|
|
|
|
vni = htonl(ifr->ifr_vnetid << VXLAN_VNI_SHIFT);
|
|
if (ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)) &&
|
|
sc->sc_header.vxlan_id == vni)
|
|
return (0);
|
|
|
|
if (ISSET(ifp->if_flags, IFF_RUNNING))
|
|
return (EBUSY);
|
|
|
|
/* commit */
|
|
SET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I));
|
|
sc->sc_header.vxlan_id = vni;
|
|
etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vxlan_get_vnetid(struct vxlan_softc *sc, struct ifreq *ifr)
|
|
{
|
|
uint32_t vni;
|
|
|
|
if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)))
|
|
return (EADDRNOTAVAIL);
|
|
|
|
vni = ntohl(sc->sc_header.vxlan_id);
|
|
vni &= VXLAN_VNI_MASK;
|
|
vni >>= VXLAN_VNI_SHIFT;
|
|
|
|
ifr->ifr_vnetid = vni;
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vxlan_del_vnetid(struct vxlan_softc *sc)
|
|
{
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
|
|
if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)))
|
|
return (0);
|
|
|
|
if (ISSET(ifp->if_flags, IFF_RUNNING))
|
|
return (EBUSY);
|
|
|
|
/* commit */
|
|
CLR(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I));
|
|
sc->sc_header.vxlan_id = htonl(0 << VXLAN_VNI_SHIFT);
|
|
etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vxlan_set_parent(struct vxlan_softc *sc, const struct if_parent *p)
|
|
{
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
struct ifnet *ifp0;
|
|
int error = 0;
|
|
|
|
ifp0 = if_unit(p->ifp_parent);
|
|
if (ifp0 == NULL)
|
|
return (ENXIO);
|
|
|
|
if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
|
|
error = ENXIO;
|
|
goto put;
|
|
}
|
|
|
|
if (sc->sc_if_index0 == ifp0->if_index)
|
|
goto put;
|
|
|
|
if (ISSET(ifp->if_flags, IFF_RUNNING)) {
|
|
error = EBUSY;
|
|
goto put;
|
|
}
|
|
|
|
ifsetlro(ifp0, 0);
|
|
|
|
/* commit */
|
|
sc->sc_if_index0 = ifp0->if_index;
|
|
etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
|
|
|
|
put:
|
|
if_put(ifp0);
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
vxlan_get_parent(struct vxlan_softc *sc, struct if_parent *p)
|
|
{
|
|
struct ifnet *ifp0;
|
|
int error = 0;
|
|
|
|
ifp0 = if_get(sc->sc_if_index0);
|
|
if (ifp0 == NULL)
|
|
error = EADDRNOTAVAIL;
|
|
else
|
|
strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent));
|
|
if_put(ifp0);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
vxlan_del_parent(struct vxlan_softc *sc)
|
|
{
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
|
|
if (sc->sc_if_index0 == 0)
|
|
return (0);
|
|
|
|
if (ISSET(ifp->if_flags, IFF_RUNNING))
|
|
return (EBUSY);
|
|
|
|
/* commit */
|
|
sc->sc_if_index0 = 0;
|
|
etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vxlan_add_addr(struct vxlan_softc *sc, const struct ifbareq *ifba)
|
|
{
|
|
struct sockaddr_in *sin;
|
|
#ifdef INET6
|
|
struct sockaddr_in6 *sin6;
|
|
struct sockaddr_in6 src6 = {
|
|
.sin6_len = sizeof(src6),
|
|
.sin6_family = AF_UNSPEC,
|
|
};
|
|
int error;
|
|
#endif
|
|
union vxlan_addr endpoint;
|
|
unsigned int type;
|
|
|
|
switch (sc->sc_mode) {
|
|
case VXLAN_TMODE_UNSET:
|
|
return (ENOPROTOOPT);
|
|
case VXLAN_TMODE_P2P:
|
|
return (EPROTONOSUPPORT);
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/* ignore ifba_ifsname */
|
|
|
|
if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK))
|
|
return (EINVAL);
|
|
switch (ifba->ifba_flags & IFBAF_TYPEMASK) {
|
|
case IFBAF_DYNAMIC:
|
|
type = EBE_DYNAMIC;
|
|
break;
|
|
case IFBAF_STATIC:
|
|
type = EBE_STATIC;
|
|
break;
|
|
default:
|
|
return (EINVAL);
|
|
}
|
|
|
|
memset(&endpoint, 0, sizeof(endpoint));
|
|
|
|
if (ifba->ifba_dstsa.ss_family != sc->sc_af)
|
|
return (EAFNOSUPPORT);
|
|
switch (ifba->ifba_dstsa.ss_family) {
|
|
case AF_INET:
|
|
sin = (struct sockaddr_in *)&ifba->ifba_dstsa;
|
|
if (in_nullhost(sin->sin_addr) ||
|
|
IN_MULTICAST(sin->sin_addr.s_addr))
|
|
return (EADDRNOTAVAIL);
|
|
|
|
if (sin->sin_port != htons(0))
|
|
return (EADDRNOTAVAIL);
|
|
|
|
endpoint.in4 = sin->sin_addr;
|
|
break;
|
|
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa;
|
|
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
|
|
IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
|
|
return (EADDRNOTAVAIL);
|
|
|
|
in6_recoverscope(&src6, &sc->sc_src.in6);
|
|
if (src6.sin6_scope_id != sin6->sin6_scope_id)
|
|
return (EADDRNOTAVAIL);
|
|
|
|
if (sin6->sin6_port != htons(0))
|
|
return (EADDRNOTAVAIL);
|
|
|
|
error = in6_embedscope(&endpoint.in6, sin6, NULL, NULL);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
break;
|
|
#endif
|
|
default: /* AF_UNSPEC */
|
|
return (EADDRNOTAVAIL);
|
|
}
|
|
|
|
return (etherbridge_add_addr(&sc->sc_eb, &endpoint,
|
|
&ifba->ifba_dst, type));
|
|
}
|
|
|
|
static int
|
|
vxlan_del_addr(struct vxlan_softc *sc, const struct ifbareq *ifba)
|
|
{
|
|
return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst));
|
|
}
|
|
|
|
void
|
|
vxlan_detach_hook(void *arg)
|
|
{
|
|
struct vxlan_softc *sc = arg;
|
|
struct ifnet *ifp = &sc->sc_ac.ac_if;
|
|
|
|
if (ISSET(ifp->if_flags, IFF_RUNNING)) {
|
|
vxlan_down(sc);
|
|
CLR(ifp->if_flags, IFF_UP);
|
|
}
|
|
|
|
sc->sc_if_index0 = 0;
|
|
}
|
|
|
|
static int
|
|
vxlan_eb_port_eq(void *arg, void *a, void *b)
|
|
{
|
|
const union vxlan_addr *va = a, *vb = b;
|
|
size_t i;
|
|
|
|
for (i = 0; i < nitems(va->in6.s6_addr32); i++) {
|
|
if (va->in6.s6_addr32[i] != vb->in6.s6_addr32[i])
|
|
return (0);
|
|
}
|
|
|
|
return (1);
|
|
}
|
|
|
|
static void *
|
|
vxlan_eb_port_take(void *arg, void *port)
|
|
{
|
|
union vxlan_addr *endpoint;
|
|
|
|
endpoint = pool_get(&vxlan_endpoint_pool, PR_NOWAIT);
|
|
if (endpoint == NULL)
|
|
return (NULL);
|
|
|
|
*endpoint = *(union vxlan_addr *)port;
|
|
|
|
return (endpoint);
|
|
}
|
|
|
|
static void
|
|
vxlan_eb_port_rele(void *arg, void *port)
|
|
{
|
|
union vxlan_addr *endpoint = port;
|
|
|
|
pool_put(&vxlan_endpoint_pool, endpoint);
|
|
}
|
|
|
|
static size_t
|
|
vxlan_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
|
|
{
|
|
struct vxlan_softc *sc = arg;
|
|
|
|
return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len));
|
|
}
|
|
|
|
static void
|
|
vxlan_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
|
|
{
|
|
struct vxlan_softc *sc = arg;
|
|
union vxlan_addr *endpoint = port;
|
|
|
|
switch (sc->sc_af) {
|
|
case AF_INET: {
|
|
struct sockaddr_in *sin = (struct sockaddr_in *)ss;
|
|
|
|
sin->sin_len = sizeof(*sin);
|
|
sin->sin_family = AF_INET;
|
|
sin->sin_addr = endpoint->in4;
|
|
break;
|
|
}
|
|
#ifdef INET6
|
|
case AF_INET6: {
|
|
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
|
|
|
|
sin6->sin6_len = sizeof(*sin6);
|
|
sin6->sin6_family = AF_INET6;
|
|
in6_recoverscope(sin6, &endpoint->in6);
|
|
break;
|
|
}
|
|
#endif /* INET6 */
|
|
default:
|
|
unhandled_af(sc->sc_af);
|
|
}
|
|
}
|
|
|
|
static inline int
|
|
vxlan_peer_cmp(const struct vxlan_peer *ap, const struct vxlan_peer *bp)
|
|
{
|
|
size_t i;
|
|
|
|
if (ap->p_header.vxlan_id > bp->p_header.vxlan_id)
|
|
return (1);
|
|
if (ap->p_header.vxlan_id < bp->p_header.vxlan_id)
|
|
return (-1);
|
|
if (ap->p_header.vxlan_flags > bp->p_header.vxlan_flags)
|
|
return (1);
|
|
if (ap->p_header.vxlan_flags < bp->p_header.vxlan_flags)
|
|
return (-1);
|
|
|
|
for (i = 0; i < nitems(ap->p_addr.in6.s6_addr32); i++) {
|
|
if (ap->p_addr.in6.s6_addr32[i] >
|
|
bp->p_addr.in6.s6_addr32[i])
|
|
return (1);
|
|
if (ap->p_addr.in6.s6_addr32[i] <
|
|
bp->p_addr.in6.s6_addr32[i])
|
|
return (-1);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
RBT_GENERATE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp);
|