diff --git a/regress/usr.sbin/bgpd/integrationtests/Makefile b/regress/usr.sbin/bgpd/integrationtests/Makefile index d683910c6..fcb5fb231 100644 --- a/regress/usr.sbin/bgpd/integrationtests/Makefile +++ b/regress/usr.sbin/bgpd/integrationtests/Makefile @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile,v 1.26 2025/01/13 14:18:07 claudio Exp $ +# $OpenBSD: Makefile,v 1.27 2025/01/14 13:15:18 claudio Exp $ REGRESS_TARGETS = network_statement md5 ovs capa policy pftable \ mrt maxprefix maxprefixout maxcomm maxattr \ @@ -51,6 +51,9 @@ l3vpn: ixp: ${SUDO} ksh ${.CURDIR}/$@.sh ${BGPD} ${.CURDIR} 11 12 pair11 pair12 +addpath: + ${SUDO} ksh ${.CURDIR}/$@.sh ${BGPD} ${.CURDIR} 11 12 pair11 pair12 + lladdr: ${SUDO} ksh ${.CURDIR}/$@.sh ${BGPD} ${.CURDIR} 11 12 pair11 pair12 diff --git a/regress/usr.sbin/bgpd/integrationtests/addpath.rdomain2.ok b/regress/usr.sbin/bgpd/integrationtests/addpath.rdomain2.ok new file mode 100644 index 000000000..66ecb94ea --- /dev/null +++ b/regress/usr.sbin/bgpd/integrationtests/addpath.rdomain2.ok @@ -0,0 +1,31 @@ +flags: * = Valid, > = Selected, I = via IBGP, A = Announced, + S = Stale, E = Error, F = Filtered +origin validation state: N = not-found, V = valid, ! = invalid +aspa validation state: ? = unknown, V = valid, ! = invalid +origin: i = IGP, e = EGP, ? = Incomplete + +flags vs destination gateway lpref med aspath origin +*> N-? 2.0.1.0/24 192.0.2.21 100 2 2 i +*> N-? 2.0.2.0/24 192.0.2.21 100 2 2 i +* N-? 2.0.2.0/24 192.0.2.31 100 3 3 i +*> N-? 2.0.3.0/24 192.0.2.21 100 2 2 i +* N-? 2.0.3.0/24 192.0.2.31 100 3 3 i +* N-? 2.0.3.0/24 192.0.2.41 100 4 4 i +*> N-? 2.0.4.0/24 192.0.2.21 100 2 2 i +* N-? 2.0.4.0/24 192.0.2.31 100 3 3 i +* N-? 2.0.4.0/24 192.0.2.41 100 4 4 i +*> N-? 2.0.5.0/24 192.0.2.21 100 2 2 i +* N-? 2.0.5.0/24 192.0.2.31 100 3 3 i +* N-? 2.0.5.0/24 192.0.2.41 100 4 4 i +*> N-? 2.0.6.0/24 192.0.2.31 100 3 3 i +* N-? 2.0.6.0/24 192.0.2.41 100 4 4 i +* N-? 2.0.6.0/24 192.0.2.51 100 5 5 i +*> N-? 2.0.7.0/24 192.0.2.21 100 2 2 i +* N-? 2.0.7.0/24 192.0.2.41 100 4 4 i +* N-? 2.0.7.0/24 192.0.2.51 100 5 5 i +*> N-? 2.0.8.0/24 192.0.2.21 100 2 2 i +* N-? 2.0.8.0/24 192.0.2.31 100 3 3 i +* N-? 2.0.8.0/24 192.0.2.51 100 5 5 i +*> N-? 2.0.9.0/24 192.0.2.21 100 2 2 i +* N-? 2.0.9.0/24 192.0.2.31 100 3 3 i +* N-? 2.0.9.0/24 192.0.2.41 100 4 4 i diff --git a/regress/usr.sbin/bgpd/integrationtests/addpath.sh b/regress/usr.sbin/bgpd/integrationtests/addpath.sh new file mode 100644 index 000000000..2101c669d --- /dev/null +++ b/regress/usr.sbin/bgpd/integrationtests/addpath.sh @@ -0,0 +1,100 @@ +#!/bin/ksh +# $OpenBSD: addpath.sh,v 1.1 2025/01/14 13:15:18 claudio Exp $ + +set -e + +BGPD=$1 +BGPDCONFIGDIR=$2 +RDOMAIN1=$3 +RDOMAIN2=$4 +PAIR1=$5 +PAIR2=$6 + +RDOMAINS="${RDOMAIN1} ${RDOMAIN2}" +PAIRS="${PAIR1} ${PAIR2}" +PAIR1IP=192.0.2.2 +PAIR2IP=192.0.2.11 +PAIR2IP2=192.0.2.21 +PAIR2IP3=192.0.2.31 +PAIR2IP4=192.0.2.41 +PAIR2IP5=192.0.2.51 + +error_notify() { + echo cleanup + pkill -T ${RDOMAIN1} bgpd || true + pkill -T ${RDOMAIN2} bgpd || true + sleep 1 + ifconfig ${PAIR2} destroy || true + ifconfig ${PAIR1} destroy || true + route -qn -T ${RDOMAIN1} flush || true + route -qn -T ${RDOMAIN2} flush || true + ifconfig lo${RDOMAIN1} destroy || true + ifconfig lo${RDOMAIN2} destroy || true + if [ $1 -ne 0 ]; then + echo FAILED + exit 1 + else + echo SUCCESS + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo need root privileges >&2 + exit 1 +fi + +. "${BGPDCONFIGDIR}/util.sh" + +trap 'error_notify $?' EXIT + +echo check if rdomains are busy +for n in ${RDOMAINS}; do + if /sbin/ifconfig | grep -v "^lo${n}:" | grep " rdomain ${n} "; then + echo routing domain ${n} is already used >&2 + exit 1 + fi +done + +echo check if interfaces are busy +for n in ${PAIRS}; do + /sbin/ifconfig "${n}" >/dev/null 2>&1 && \ + ( echo interface ${n} is already used >&2; exit 1 ) +done + +set -x + +echo setup +ifconfig ${PAIR1} rdomain ${RDOMAIN1} ${PAIR1IP}/24 up +ifconfig ${PAIR2} rdomain ${RDOMAIN2} ${PAIR2IP}/24 up +ifconfig ${PAIR2} alias ${PAIR2IP2}/32 +ifconfig ${PAIR2} alias ${PAIR2IP3}/32 +ifconfig ${PAIR2} alias ${PAIR2IP4}/32 +ifconfig ${PAIR2} alias ${PAIR2IP5}/32 +ifconfig ${PAIR1} patch ${PAIR2} +ifconfig lo${RDOMAIN1} inet 127.0.0.1/8 +ifconfig lo${RDOMAIN2} inet 127.0.0.1/8 + +echo run bgpds +route -T ${RDOMAIN1} exec ${BGPD} \ + -v -f ${BGPDCONFIGDIR}/bgpd.addpath.rdomain1.conf +sleep 2 +route -T ${RDOMAIN2} exec ${BGPD} \ + -v -f ${BGPDCONFIGDIR}/bgpd.addpath.rdomain2_1.conf +route -T ${RDOMAIN2} exec ${BGPD} \ + -v -f ${BGPDCONFIGDIR}/bgpd.addpath.rdomain2_2.conf +route -T ${RDOMAIN2} exec ${BGPD} \ + -v -f ${BGPDCONFIGDIR}/bgpd.addpath.rdomain2_3.conf +route -T ${RDOMAIN2} exec ${BGPD} \ + -v -f ${BGPDCONFIGDIR}/bgpd.addpath.rdomain2_4.conf +route -T ${RDOMAIN2} exec ${BGPD} \ + -v -f ${BGPDCONFIGDIR}/bgpd.addpath.rdomain2_5.conf + +sleep 7 + +route -T ${RDOMAIN1} exec bgpctl show +route -T ${RDOMAIN2} exec bgpctl show rib | tee addpath.rdomain2.out + +diff -u ${BGPDCONFIGDIR}/addpath.rdomain2.ok addpath.rdomain2.out +echo OK + +exit 0 diff --git a/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain1.conf b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain1.conf new file mode 100644 index 000000000..6efdf6ac0 --- /dev/null +++ b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain1.conf @@ -0,0 +1,51 @@ +# built by ARouteServer +AS 999 +router-id 192.0.2.2 + +fib-update no +log updates + +nexthop qualify via default + +rde evaluate all + +# --------------------------------------------------------- +# MEMBERS + +group "clients" { + transparent-as yes + + neighbor 192.0.2.11 { + remote-as 1 + descr "AS1_1 client" + + announce add-path send best plus 2 + } + + neighbor 192.0.2.21 { + remote-as 2 + descr "AS2_1 client" + set med +2 + } + + neighbor 192.0.2.31 { + remote-as 3 + descr "AS3_1 client" + set med +3 + } + + neighbor 192.0.2.41 { + remote-as 4 + descr "AS4_1 client" + set med +4 + } + + neighbor 192.0.2.51 { + remote-as 5 + descr "AS5_1 client" + set med +5 + } +} + +allow from any +allow to any diff --git a/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_1.conf b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_1.conf new file mode 100644 index 000000000..b08f5032d --- /dev/null +++ b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_1.conf @@ -0,0 +1,17 @@ +AS 1 +router-id 192.0.2.11 +listen on 192.0.2.11 +fib-update no + +rde med compare always + +neighbor 192.0.2.2 { + remote-as 999 + local-address 192.0.2.11 + enforce neighbor-as no + + announce add-path recv yes +} + +allow from any +allow to any diff --git a/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_2.conf b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_2.conf new file mode 100644 index 000000000..2684c630b --- /dev/null +++ b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_2.conf @@ -0,0 +1,24 @@ +AS 2 +router-id 192.0.2.21 +listen on 192.0.2.21 +fib-update no +socket "/var/run/bgpd.sock.12_2" + +network 2.0.1.0/24 +network 2.0.2.0/24 +network 2.0.3.0/24 +network 2.0.4.0/24 +network 2.0.5.0/24 +network 2.0.6.0/24 set prepend-self 2 +network 2.0.7.0/24 +network 2.0.8.0/24 +network 2.0.9.0/24 + +neighbor 192.0.2.2 { + remote-as 999 + local-address 192.0.2.21 + enforce neighbor-as no +} + +deny from any +allow to any diff --git a/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_3.conf b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_3.conf new file mode 100644 index 000000000..166d89d63 --- /dev/null +++ b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_3.conf @@ -0,0 +1,23 @@ +AS 3 +router-id 192.0.2.31 +listen on 192.0.2.31 +fib-update no +socket "/var/run/bgpd.sock.12_3" + +network 2.0.2.0/24 +network 2.0.3.0/24 +network 2.0.4.0/24 +network 2.0.5.0/24 +network 2.0.6.0/24 +network 2.0.7.0/24 set prepend-self 2 +network 2.0.8.0/24 +network 2.0.9.0/24 + +neighbor 192.0.2.2 { + remote-as 999 + local-address 192.0.2.31 + enforce neighbor-as no +} + +deny from any +allow to any diff --git a/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_4.conf b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_4.conf new file mode 100644 index 000000000..7fe080be1 --- /dev/null +++ b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_4.conf @@ -0,0 +1,22 @@ +AS 4 +router-id 192.0.2.41 +listen on 192.0.2.41 +fib-update no +socket "/var/run/bgpd.sock.12_4" + +network 2.0.3.0/24 +network 2.0.4.0/24 +network 2.0.5.0/24 +network 2.0.6.0/24 +network 2.0.7.0/24 +network 2.0.8.0/24 set prepend-self 2 +network 2.0.9.0/24 + +neighbor 192.0.2.2 { + remote-as 999 + local-address 192.0.2.41 + enforce neighbor-as no +} + +deny from any +allow to any diff --git a/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_5.conf b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_5.conf new file mode 100644 index 000000000..f3e361a43 --- /dev/null +++ b/regress/usr.sbin/bgpd/integrationtests/bgpd.addpath.rdomain2_5.conf @@ -0,0 +1,21 @@ +AS 5 +router-id 192.0.2.51 +listen on 192.0.2.51 +fib-update no +socket "/var/run/bgpd.sock.12_5" + +network 2.0.4.0/24 +network 2.0.5.0/24 +network 2.0.6.0/24 +network 2.0.7.0/24 +network 2.0.8.0/24 +network 2.0.9.0/24 set prepend-self 2 + +neighbor 192.0.2.2 { + remote-as 999 + local-address 192.0.2.51 + enforce neighbor-as no +} + +deny from any +allow to any diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c index da7f2c3be..db5f5381d 100644 --- a/sys/dev/fdt/virtio_mmio.c +++ b/sys/dev/fdt/virtio_mmio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: virtio_mmio.c,v 1.21 2024/12/20 22:18:27 sf Exp $ */ +/* $OpenBSD: virtio_mmio.c,v 1.23 2025/01/14 14:28:38 sf Exp $ */ /* $NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $ */ /* @@ -105,6 +105,8 @@ int virtio_mmio_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *); int virtio_mmio_intr(void *); void virtio_mmio_intr_barrier(struct virtio_softc *); +int virtio_mmio_intr_establish(struct virtio_softc *, struct virtio_attach_args *, + int, struct cpu_info *, int (*)(void *), void *); struct virtio_mmio_softc { struct virtio_softc sc_sc; @@ -122,7 +124,7 @@ struct virtio_mmio_softc { struct virtio_mmio_attach_args { struct virtio_attach_args vma_va; - struct fdt_attach_args *vma_fa; + struct fdt_attach_args *vma_fa; }; const struct cfattach virtio_mmio_ca = { @@ -160,6 +162,7 @@ const struct virtio_ops virtio_mmio_ops = { virtio_mmio_attach_finish, virtio_mmio_intr, virtio_mmio_intr_barrier, + virtio_mmio_intr_establish, }; uint16_t @@ -234,7 +237,7 @@ virtio_mmio_set_status(struct virtio_softc *vsc, int status) VIRTIO_MMIO_STATUS) != 0) { CPU_BUSY_CYCLE(); } - } else { + } else { old = bus_space_read_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_STATUS); bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_STATUS, @@ -546,3 +549,11 @@ virtio_mmio_intr_barrier(struct virtio_softc *vsc) if (sc->sc_ih) intr_barrier(sc->sc_ih); } + +int +virtio_mmio_intr_establish(struct virtio_softc *vsc, + struct virtio_attach_args *va, int vec, struct cpu_info *ci, + int (*func)(void *), void *arg) +{ + return ENXIO; +} diff --git a/sys/dev/pci/drm/i915/gt/intel_context.c b/sys/dev/pci/drm/i915/gt/intel_context.c index 71f8ea369..f6ad6c57d 100644 --- a/sys/dev/pci/drm/i915/gt/intel_context.c +++ b/sys/dev/pci/drm/i915/gt/intel_context.c @@ -404,7 +404,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) rw_init(&ce->pin_mutex, "cepin"); - mtx_init(&ce->guc_state.lock, IPL_NONE); + mtx_init(&ce->guc_state.lock, IPL_TTY); INIT_LIST_HEAD(&ce->guc_state.fences); INIT_LIST_HEAD(&ce->guc_state.requests); diff --git a/sys/dev/pci/drm/i915/gt/intel_engine_user.c b/sys/dev/pci/drm/i915/gt/intel_engine_user.c index 10024c313..df26622f8 100644 --- a/sys/dev/pci/drm/i915/gt/intel_engine_user.c +++ b/sys/dev/pci/drm/i915/gt/intel_engine_user.c @@ -165,7 +165,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915) disabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); - if (intel_uc_uses_guc_submission(&to_gt(i915)->uc)) + if (intel_uc_uses_guc_submission(&engine->gt->uc)) enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP; for (i = 0; i < ARRAY_SIZE(map); i++) { diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c index 8463f6223..2c59e899c 100644 --- a/sys/dev/pci/virtio_pci.c +++ b/sys/dev/pci/virtio_pci.c @@ -1,4 +1,4 @@ -/* $OpenBSD: virtio_pci.c,v 1.48 2024/12/20 22:18:27 sf Exp $ */ +/* $OpenBSD: virtio_pci.c,v 1.50 2025/01/14 14:28:38 sf Exp $ */ /* $NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $ */ /* @@ -50,7 +50,7 @@ * XXX: PCI-endian while the device specific registers are native endian. */ -#define MAX_MSIX_VECS 8 +#define MAX_MSIX_VECS 16 struct virtio_pci_softc; struct virtio_pci_attach_args; @@ -62,7 +62,7 @@ int virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *p int virtio_pci_detach(struct device *, int); void virtio_pci_kick(struct virtio_softc *, uint16_t); -int virtio_pci_adjust_config_region(struct virtio_pci_softc *); +int virtio_pci_adjust_config_region(struct virtio_pci_softc *, int offset); uint8_t virtio_pci_read_device_config_1(struct virtio_softc *, int); uint16_t virtio_pci_read_device_config_2(struct virtio_softc *, int); uint32_t virtio_pci_read_device_config_4(struct virtio_softc *, int); @@ -81,9 +81,10 @@ int virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_fe int virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *); void virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t); void virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t); -int virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, int (*)(void *), void *); +int virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, struct cpu_info *, int (*)(void *), void *); int virtio_pci_setup_msix(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int); void virtio_pci_intr_barrier(struct virtio_softc *); +int virtio_pci_intr_establish(struct virtio_softc *, struct virtio_attach_args *, int, struct cpu_info *, int (*)(void *), void *); void virtio_pci_free_irqs(struct virtio_pci_softc *); int virtio_pci_poll_intr(void *); int virtio_pci_legacy_intr(void *); @@ -100,6 +101,7 @@ enum irq_type { IRQ_NO_MSIX, IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */ IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */ + IRQ_MSIX_CHILD, /* assigned by child driver */ }; struct virtio_pci_intr { @@ -179,6 +181,7 @@ const struct virtio_ops virtio_pci_ops = { virtio_pci_attach_finish, virtio_pci_poll_intr, virtio_pci_intr_barrier, + virtio_pci_intr_establish, }; static inline uint64_t @@ -426,7 +429,7 @@ virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int bu printf("%s: cap too large\n", __func__); return ERANGE; } - for (i = 4; i < len / sizeof(pcireg_t); i++) + for (i = 4; i < len / sizeof(pcireg_t); i++) v->reg[i] = pci_conf_read(pc, tag, offset + i * 4); } @@ -648,10 +651,12 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux) goto free; } - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI; sc->sc_irq_type = IRQ_NO_MSIX; - if (virtio_pci_adjust_config_region(sc) != 0) - goto err; + if (virtio_pci_adjust_config_region(sc, + VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI) != 0) + { + goto free; + } virtio_device_reset(vsc); virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK); @@ -692,7 +697,9 @@ virtio_pci_attach_finish(struct virtio_softc *vsc, pci_chipset_tag_t pc = vpa->vpa_pa->pa_pc; char const *intrstr; - if (virtio_pci_setup_msix(sc, vpa, 0) == 0) { + if (sc->sc_irq_type == IRQ_MSIX_CHILD) { + intrstr = "msix"; + } else if (virtio_pci_setup_msix(sc, vpa, 0) == 0) { sc->sc_irq_type = IRQ_MSIX_PER_VQ; intrstr = "msix per-VQ"; } else if (virtio_pci_setup_msix(sc, vpa, 1) == 0) { @@ -754,11 +761,14 @@ virtio_pci_detach(struct device *self, int flags) } int -virtio_pci_adjust_config_region(struct virtio_pci_softc *sc) +virtio_pci_adjust_config_region(struct virtio_pci_softc *sc, int offset) { if (sc->sc_sc.sc_version_1) return 0; - sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset; + if (sc->sc_devcfg_offset == offset) + return 0; + sc->sc_devcfg_offset = offset; + sc->sc_devcfg_iosize = sc->sc_iosize - offset; sc->sc_devcfg_iot = sc->sc_iot; if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset, sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) { @@ -958,30 +968,33 @@ virtio_pci_write_device_config_8(struct virtio_softc *vsc, int virtio_pci_msix_establish(struct virtio_pci_softc *sc, - struct virtio_pci_attach_args *vpa, int idx, + struct virtio_pci_attach_args *vpa, int idx, struct cpu_info *ci, int (*handler)(void *), void *ih_arg) { struct virtio_softc *vsc = &sc->sc_sc; pci_intr_handle_t ih; + int r; KASSERT(idx < sc->sc_nintr); - if (pci_intr_map_msix(vpa->vpa_pa, idx, &ih) != 0) { + r = pci_intr_map_msix(vpa->vpa_pa, idx, &ih); + if (r != 0) { #if VIRTIO_DEBUG printf("%s[%d]: pci_intr_map_msix failed\n", vsc->sc_dev.dv_xname, idx); #endif - return 1; + return r; } snprintf(sc->sc_intr[idx].name, sizeof(sc->sc_intr[idx].name), "%s:%d", vsc->sc_child->dv_xname, idx); - sc->sc_intr[idx].ih = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl, - handler, ih_arg, sc->sc_intr[idx].name); + sc->sc_intr[idx].ih = pci_intr_establish_cpu(sc->sc_pc, ih, vsc->sc_ipl, + ci, handler, ih_arg, sc->sc_intr[idx].name); if (sc->sc_intr[idx].ih == NULL) { printf("%s[%d]: couldn't establish msix interrupt\n", - vsc->sc_dev.dv_xname, idx); - return 1; + vsc->sc_child->dv_xname, idx); + return ENOMEM; } + virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_MSI); return 0; } @@ -1031,8 +1044,8 @@ virtio_pci_free_irqs(struct virtio_pci_softc *sc) } } - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI; - virtio_pci_adjust_config_region(sc); + /* XXX msix_delroute does not unset PCI_MSIX_MC_MSIXE -> leave alone? */ + virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI); } int @@ -1040,34 +1053,33 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc, struct virtio_pci_attach_args *vpa, int shared) { struct virtio_softc *vsc = &sc->sc_sc; - int i; + int i, r = 0; /* Shared needs config + queue */ if (shared && vpa->vpa_va.va_nintr < 1 + 1) - return 1; + return ERANGE; /* Per VQ needs config + N * queue */ if (!shared && vpa->vpa_va.va_nintr < 1 + vsc->sc_nvqs) - return 1; + return ERANGE; - if (virtio_pci_msix_establish(sc, vpa, 0, virtio_pci_config_intr, vsc)) - return 1; - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI; - virtio_pci_adjust_config_region(sc); + r = virtio_pci_msix_establish(sc, vpa, 0, NULL, virtio_pci_config_intr, vsc); + if (r != 0) + return r; if (shared) { - if (virtio_pci_msix_establish(sc, vpa, 1, - virtio_pci_shared_queue_intr, vsc)) { + r = virtio_pci_msix_establish(sc, vpa, 1, NULL, + virtio_pci_shared_queue_intr, vsc); + if (r != 0) goto fail; - } for (i = 0; i < vsc->sc_nvqs; i++) vsc->sc_vqs[i].vq_intr_vec = 1; } else { for (i = 0; i < vsc->sc_nvqs; i++) { - if (virtio_pci_msix_establish(sc, vpa, i + 1, - virtio_pci_queue_intr, &vsc->sc_vqs[i])) { + r = virtio_pci_msix_establish(sc, vpa, i + 1, NULL, + virtio_pci_queue_intr, &vsc->sc_vqs[i]); + if (r != 0) goto fail; - } vsc->sc_vqs[i].vq_intr_vec = i + 1; } } @@ -1075,7 +1087,28 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc, return 0; fail: virtio_pci_free_irqs(sc); - return 1; + return r; +} + +int +virtio_pci_intr_establish(struct virtio_softc *vsc, + struct virtio_attach_args *va, int vec, struct cpu_info *ci, + int (*func)(void *), void *arg) +{ + struct virtio_pci_attach_args *vpa; + struct virtio_pci_softc *sc; + + if (vsc->sc_ops != &virtio_pci_ops) + return ENXIO; + + vpa = (struct virtio_pci_attach_args *)va; + sc = (struct virtio_pci_softc *)vsc; + + if (vec >= sc->sc_nintr || sc->sc_nintr <= 1) + return ERANGE; + + sc->sc_irq_type = IRQ_MSIX_CHILD; + return virtio_pci_msix_establish(sc, vpa, vec, ci, func, arg); } void diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c index 3c532da0d..eeebcd2cc 100644 --- a/sys/dev/pv/if_vio.c +++ b/sys/dev/pv/if_vio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_vio.c,v 1.63 2025/01/06 14:23:52 sf Exp $ */ +/* $OpenBSD: if_vio.c,v 1.66 2025/01/14 14:32:32 sf Exp $ */ /* * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg. @@ -32,8 +32,10 @@ #include #include #include +#include #include #include +#include /* for CACHELINESIZE */ #include #include @@ -64,14 +66,21 @@ * if_vioreg.h: */ /* Configuration registers */ -#define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */ -#define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */ +#define VIRTIO_NET_CONFIG_MAC 0 /* 8 bit x 6 byte */ +#define VIRTIO_NET_CONFIG_STATUS 6 /* 16 bit */ +#define VIRTIO_NET_CONFIG_MAX_QUEUES 8 /* 16 bit */ +#define VIRTIO_NET_CONFIG_MTU 10 /* 16 bit */ +#define VIRTIO_NET_CONFIG_SPEED 12 /* 32 bit */ +#define VIRTIO_NET_CONFIG_DUPLEX 16 /* 8 bit */ +#define VIRTIO_NET_CONFIG_RSS_SIZE 17 /* 8 bit */ +#define VIRTIO_NET_CONFIG_RSS_LEN 18 /* 16 bit */ +#define VIRTIO_NET_CONFIG_HASH_TYPES 20 /* 16 bit */ /* Feature bits */ #define VIRTIO_NET_F_CSUM (1ULL<<0) #define VIRTIO_NET_F_GUEST_CSUM (1ULL<<1) -#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS (1ULL<<2) -#define VIRTIO_NET_F_MTU (1ULL<<3) +#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS (1ULL<<2) +#define VIRTIO_NET_F_MTU (1ULL<<3) #define VIRTIO_NET_F_MAC (1ULL<<5) #define VIRTIO_NET_F_GSO (1ULL<<6) #define VIRTIO_NET_F_GUEST_TSO4 (1ULL<<7) @@ -183,6 +192,11 @@ struct virtio_net_ctrl_cmd { # define VIRTIO_NET_CTRL_VLAN_ADD 0 # define VIRTIO_NET_CTRL_VLAN_DEL 1 +#define VIRTIO_NET_CTRL_MQ 4 +# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 +# define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1 +# define VIRTIO_NET_CTRL_MQ_HASH_CONFIG 2 + #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 # define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 @@ -196,6 +210,12 @@ struct virtio_net_ctrl_rx { uint8_t onoff; } __packed; +struct virtio_net_ctrl_mq_pairs_set { + uint16_t virtqueue_pairs; +}; +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 + struct virtio_net_ctrl_guest_offloads { uint64_t offloads; } __packed; @@ -231,7 +251,7 @@ struct vio_queue { struct virtqueue *viq_txvq; struct mutex viq_txmtx, viq_rxmtx; int viq_txfree_slots; -}; +} __aligned(CACHELINESIZE); struct vio_softc { struct device sc_dev; @@ -251,14 +271,16 @@ struct vio_softc { caddr_t sc_dma_kva; int sc_hdr_size; - struct virtio_net_ctrl_cmd *sc_ctrl_cmd; - struct virtio_net_ctrl_status *sc_ctrl_status; - struct virtio_net_ctrl_rx *sc_ctrl_rx; - struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads; - struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc; + struct virtio_net_ctrl_cmd *sc_ctrl_cmd; + struct virtio_net_ctrl_status *sc_ctrl_status; + struct virtio_net_ctrl_rx *sc_ctrl_rx; + struct virtio_net_ctrl_mq_pairs_set *sc_ctrl_mq_pairs; + struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads; + struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc; #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc - struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc; + struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc; + struct intrmap *sc_intrmap; struct vio_queue *sc_q; uint16_t sc_nqueues; int sc_tx_slots_per_req; @@ -317,10 +339,15 @@ void vio_tx_drain(struct vio_softc *); int vio_encap(struct vio_queue *, int, struct mbuf *); void vio_txtick(void *); +int vio_queue_intr(void *); +int vio_config_intr(void *); +int vio_ctrl_intr(void *); + /* other control */ void vio_link_state(struct ifnet *); int vio_config_change(struct virtio_softc *); int vio_ctrl_rx(struct vio_softc *, int, int); +int vio_ctrl_mq(struct vio_softc *); int vio_ctrl_guest_offloads(struct vio_softc *, uint64_t); int vio_set_rx_filter(struct vio_softc *); void vio_iff(struct vio_softc *); @@ -408,6 +435,8 @@ vio_free_dmamem(struct vio_softc *sc) * sc_ctrl_status: return value for a command via ctrl vq (READ) * sc_ctrl_rx: parameter for a VIRTIO_NET_CTRL_RX class command * (WRITE) + * sc_ctrl_mq_pairs_set: set number of rx/tx queue pais (WRITE) + * sc_ctrl_guest_offloads: configure offload features (WRITE) * sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC * class command (WRITE) * sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC @@ -449,6 +478,7 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments) allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1; allocsize += sizeof(struct virtio_net_ctrl_status) * 1; allocsize += sizeof(struct virtio_net_ctrl_rx) * 1; + allocsize += sizeof(struct virtio_net_ctrl_mq_pairs_set) * 1; allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1; allocsize += VIO_CTRL_MAC_INFO_SIZE; } @@ -474,6 +504,8 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments) offset += sizeof(*sc->sc_ctrl_status); sc->sc_ctrl_rx = (void *)(kva + offset); offset += sizeof(*sc->sc_ctrl_rx); + sc->sc_ctrl_mq_pairs = (void *)(kva + offset); + offset += sizeof(*sc->sc_ctrl_mq_pairs); sc->sc_ctrl_guest_offloads = (void *)(kva + offset); offset += sizeof(*sc->sc_ctrl_guest_offloads); sc->sc_ctrl_mac_tbl_uc = (void *)(kva + offset); @@ -598,7 +630,7 @@ vio_attach(struct device *parent, struct device *self, void *aux) struct vio_softc *sc = (struct vio_softc *)self; struct virtio_softc *vsc = (struct virtio_softc *)parent; struct virtio_attach_args *va = aux; - int i, tx_max_segments; + int i, r, tx_max_segments; struct ifnet *ifp = &sc->sc_ac.ac_if; if (vsc->sc_child != NULL) { @@ -616,6 +648,9 @@ vio_attach(struct device *parent, struct device *self, void *aux) VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM | VIRTIO_F_RING_EVENT_IDX | VIRTIO_NET_F_GUEST_CSUM; + if (va->va_nintr > 3 && ncpus > 1) + vsc->sc_driver_features |= VIRTIO_NET_F_MQ; + vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4; vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6; @@ -626,10 +661,23 @@ vio_attach(struct device *parent, struct device *self, void *aux) if (virtio_negotiate_features(vsc, virtio_net_feature_names) != 0) goto err; - sc->sc_nqueues = 1; - vsc->sc_nvqs = 2 * sc->sc_nqueues; - if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) - vsc->sc_nvqs++; + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) { + i = virtio_read_device_config_2(vsc, + VIRTIO_NET_CONFIG_MAX_QUEUES); + vsc->sc_nvqs = 2 * i + 1; + i = MIN(i, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX); + sc->sc_intrmap = intrmap_create(&sc->sc_dev, i, + va->va_nintr - 2, 0); + sc->sc_nqueues = intrmap_count(sc->sc_intrmap); + printf(": %u queue%s", sc->sc_nqueues, + sc->sc_nqueues > 1 ? "s" : ""); + } else { + sc->sc_nqueues = 1; + printf(": 1 queue"); + vsc->sc_nvqs = 2; + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) + vsc->sc_nvqs++; + } vsc->sc_vqs = mallocarray(vsc->sc_nvqs, sizeof(*vsc->sc_vqs), M_DEVBUF, M_WAITOK|M_ZERO); @@ -729,18 +777,66 @@ vio_attach(struct device *parent, struct device *self, void *aux) else virtio_stop_vq_intr(vsc, vioq->viq_txvq); vioq->viq_txfree_slots = vioq->viq_txvq->vq_num - 1; + KASSERT(vioq->viq_txfree_slots > sc->sc_tx_slots_per_req); + if (vioq->viq_txvq->vq_num != sc->sc_q[0].viq_txvq->vq_num) { + printf("inequal tx queue size %d: %d != %d\n", i, + vioq->viq_txvq->vq_num, + sc->sc_q[0].viq_txvq->vq_num); + goto err; + } + DPRINTF("%d: q %p rx %p tx %p\n", i, vioq, vioq->viq_rxvq, + vioq->viq_txvq); + + if (sc->sc_intrmap != NULL) { + vioq->viq_rxvq->vq_intr_vec = i + 2; + vioq->viq_txvq->vq_intr_vec = i + 2; + } } /* control queue */ if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) { - sc->sc_ctl_vq = &vsc->sc_vqs[2]; - if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, 2, 1, - "control") != 0) + i = 2; + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) { + i = 2 * virtio_read_device_config_2(vsc, + VIRTIO_NET_CONFIG_MAX_QUEUES); + } + sc->sc_ctl_vq = &vsc->sc_vqs[i]; + if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, i, 1, "control") != 0) goto err; sc->sc_ctl_vq->vq_done = vio_ctrleof; + if (sc->sc_intrmap != NULL) + sc->sc_ctl_vq->vq_intr_vec = 1; virtio_start_vq_intr(vsc, sc->sc_ctl_vq); } + if (sc->sc_intrmap) { + r = virtio_intr_establish(vsc, va, 0, NULL, vio_config_intr, + vsc); + if (r != 0) { + printf("%s: cannot alloc config intr: %d\n", + sc->sc_dev.dv_xname, r); + goto err; + } + r = virtio_intr_establish(vsc, va, 1, NULL, vio_ctrl_intr, + sc->sc_ctl_vq); + if (r != 0) { + printf("%s: cannot alloc ctrl intr: %d\n", + sc->sc_dev.dv_xname, r); + goto err; + } + for (i = 0; i < sc->sc_nqueues; i++) { + struct cpu_info *ci = NULL; + ci = intrmap_cpu(sc->sc_intrmap, i); + r = virtio_intr_establish(vsc, va, i + 2, ci, + vio_queue_intr, &sc->sc_q[i]); + if (r != 0) { + printf("%s: cannot alloc q%d intr: %d\n", + sc->sc_dev.dv_xname, i, r); + goto err; + } + } + } + if (vio_alloc_mem(sc, tx_max_segments) < 0) goto err; @@ -760,6 +856,11 @@ vio_attach(struct device *parent, struct device *self, void *aux) if (virtio_attach_finish(vsc, va) != 0) goto err; + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) { + /* ctrl queue works only after DRIVER_OK */ + vio_ctrl_mq(sc); + } + if_attach(ifp); ether_ifattach(ifp); vio_link_state(ifp); @@ -805,6 +906,33 @@ vio_link_state(struct ifnet *ifp) } } +/* interrupt handlers for multi-queue */ +int +vio_queue_intr(void *arg) +{ + struct vio_queue *vioq = arg; + struct virtio_softc *vsc = vioq->viq_sc->sc_virtio; + int r; + r = virtio_check_vq(vsc, vioq->viq_txvq); + r |= virtio_check_vq(vsc, vioq->viq_rxvq); + return r; +} + +int +vio_config_intr(void *arg) +{ + struct virtio_softc *vsc = arg; + return vio_config_change(vsc); +} + +int +vio_ctrl_intr(void *arg) +{ + struct virtqueue *vq = arg; + return virtio_check_vq(vq->vq_owner, vq); +} + + int vio_config_change(struct virtio_softc *vsc) { @@ -913,6 +1041,8 @@ vio_stop(struct ifnet *ifp, int disable) if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) virtio_start_vq_intr(vsc, sc->sc_ctl_vq); virtio_reinit_end(vsc); + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) + vio_ctrl_mq(sc); if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) vio_ctrl_wakeup(sc, FREE); } @@ -1137,6 +1267,33 @@ vio_dump(struct vio_softc *sc) } #endif +static int +vio_rxr_info(struct vio_softc *sc, struct if_rxrinfo *ifri) +{ + struct if_rxring_info *ifrs, *ifr; + int error; + unsigned int i; + + ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs), + M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL); + if (ifrs == NULL) + return (ENOMEM); + + for (i = 0; i < sc->sc_nqueues; i++) { + ifr = &ifrs[i]; + + ifr->ifr_size = sc->sc_rx_mbuf_size; + snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i); + ifr->ifr_info = sc->sc_q[i].viq_rxring; + } + + error = if_rxr_info_ioctl(ifri, i, ifrs); + + free(ifrs, M_TEMP, i * sizeof(*ifrs)); + + return (error); +} + int vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { @@ -1171,8 +1328,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) r = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; case SIOCGIFRXR: - r = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data, - NULL, sc->sc_rx_mbuf_size, &sc->sc_q[0].viq_rxring); + r = vio_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data); break; default: r = ether_ioctl(ifp, &sc->sc_ac, cmd, data); @@ -1666,6 +1822,8 @@ vio_ctrl_submit(struct vio_softc *sc, int slot) vio_ctrl_wakeup(sc, RESET); return ENXIO; } + if (cold) + virtio_check_vq(sc->sc_virtio, sc->sc_ctl_vq); } VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, @@ -1723,6 +1881,41 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff) return r; } +/* issue a VIRTIO_NET_CTRL_MQ class command and wait for completion */ +int +vio_ctrl_mq(struct vio_softc *sc) +{ + struct virtio_softc *vsc = sc->sc_virtio; + struct virtqueue *vq = sc->sc_ctl_vq; + int r, slot; + + + r = vio_ctrl_start(sc, VIRTIO_NET_CTRL_MQ, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, 1, &slot); + if (r != 0) + return r; + + sc->sc_ctrl_mq_pairs->virtqueue_pairs = sc->sc_nqueues; + + vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_mq_pairs, + sizeof(*sc->sc_ctrl_mq_pairs), 1); + + r = vio_ctrl_submit(sc, slot); + + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mq_pairs, + sizeof(*sc->sc_ctrl_mq_pairs), BUS_DMASYNC_POSTWRITE); + + if (r != 0) + printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET); + + DPRINTF("%s: cmd %d %d: %d\n", __func__, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, sc->sc_nqueues, r); + + vio_ctrl_finish(sc); + return r; +} + int vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features) { @@ -1745,7 +1938,7 @@ vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features) VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads, sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_POSTWRITE); - if (r != 0) { + if (r != 0 && features != 0) { printf("%s: offload features 0x%llx failed\n", sc->sc_dev.dv_xname, features); } diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h index c4f35a9f1..029f57438 100644 --- a/sys/dev/pv/virtiovar.h +++ b/sys/dev/pv/virtiovar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: virtiovar.h,v 1.27 2025/01/09 10:55:22 sf Exp $ */ +/* $OpenBSD: virtiovar.h,v 1.28 2025/01/14 12:30:57 sf Exp $ */ /* $NetBSD: virtiovar.h,v 1.1 2011/10/30 12:12:21 hannken Exp $ */ /* @@ -165,6 +165,8 @@ struct virtio_ops { int (*attach_finish)(struct virtio_softc *, struct virtio_attach_args *); int (*poll_intr)(void *); void (*intr_barrier)(struct virtio_softc *); + int (*intr_establish)(struct virtio_softc *, struct virtio_attach_args *, + int, struct cpu_info *, int (*)(void *), void *); }; #define VIRTIO_CHILD_ERROR ((void*)1) @@ -208,6 +210,14 @@ struct virtio_softc { #define virtio_set_status(sc, i) (sc)->sc_ops->set_status(sc, i) #define virtio_intr_barrier(sc) (sc)->sc_ops->intr_barrier(sc) +/* + * virtio_intr_establish() only works if va_nintr > 1. If it is called by a + * child driver, the transport driver will skip automatic intr allocation and + * the child driver must allocate all required interrupts itself. Vector 0 is + * always used for the config change interrupt. + */ +#define virtio_intr_establish(sc, va, v, ci, fn, a) (sc)->sc_ops->intr_establish(sc, va, v, ci, fn, a) + /* only for transport drivers */ #define virtio_device_reset(sc) virtio_set_status((sc), 0) diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index dd90a719f..b95157c6d 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_malloc.c,v 1.152 2024/06/26 01:40:49 jsg Exp $ */ +/* $OpenBSD: kern_malloc.c,v 1.153 2025/01/14 18:37:51 mvs Exp $ */ /* $NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $ */ /* @@ -50,6 +50,11 @@ #include #endif +/* + * Locks used to protect data: + * I Immutable data + */ + static #ifndef SMALL_KERNEL __inline__ @@ -95,12 +100,11 @@ struct kmemstats kmemstats[M_LAST]; #endif struct kmemusage *kmemusage; char *kmembase, *kmemlimit; -char buckstring[16 * sizeof("123456,")]; +char buckstring[16 * sizeof("123456,")]; /* [I] */ int buckstring_init = 0; #if defined(KMEMSTATS) || defined(DIAGNOSTIC) char *memname[] = INITKMEMNAMES; -char *memall = NULL; -struct rwlock sysctl_kmemlock = RWLOCK_INITIALIZER("sysctlklk"); +char *memall; /* [I] */ #endif /* @@ -540,6 +544,10 @@ kmeminit(void) vaddr_t base, limit; long indx; +#if defined(KMEMSTATS) || defined(DIAGNOSTIC) + int i, siz, totlen; +#endif + #ifdef DIAGNOSTIC if (sizeof(struct kmem_freelist) > (1 << MINBUCKET)) panic("kmeminit: minbucket too small/struct freelist too big"); @@ -577,6 +585,38 @@ kmeminit(void) for (indx = 0; indx < M_LAST; indx++) kmemstats[indx].ks_limit = (long)nkmempages * PAGE_SIZE * 6 / 10; + + memset(buckstring, 0, sizeof(buckstring)); + for (siz = 0, i = MINBUCKET; i < MINBUCKET + 16; i++) { + snprintf(buckstring + siz, sizeof buckstring - siz, + "%d,", (u_int)(1< 0 @@ -454,9 +457,6 @@ kern_sysctl_dirs_locked(int top_name, int *name, u_int namelen, return (sysctl_doprof(name, namelen, oldp, oldlenp, newp, newlen)); #endif - case KERN_MALLOCSTATS: - return (sysctl_malloc(name, namelen, oldp, oldlenp, - newp, newlen, p)); case KERN_TTY: return (sysctl_tty(name, namelen, oldp, oldlenp, newp, newlen)); diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index e5c040ca1..762f075f0 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_timer.c,v 1.80 2025/01/05 12:18:48 bluhm Exp $ */ +/* $OpenBSD: tcp_timer.c,v 1.81 2025/01/14 13:49:44 bluhm Exp $ */ /* $NetBSD: tcp_timer.c,v 1.14 1996/02/13 23:44:09 christos Exp $ */ /* @@ -106,6 +106,36 @@ tcp_timer_init(void) tcp_delack_msecs = TCP_DELACK_MSECS; } +static inline int +tcp_timer_enter(struct inpcb *inp, struct socket **so, struct tcpcb **tp, + u_int timer) +{ + KASSERT(timer < TCPT_NTIMERS); + + NET_LOCK_SHARED(); + *so = in_pcbsolock_ref(inp); + if (*so == NULL) { + *tp = NULL; + return -1; + } + *tp = intotcpcb(inp); + /* Ignore canceled timeouts or timeouts that have been rescheduled. */ + if (*tp == NULL || !ISSET((*tp)->t_flags, TF_TIMER << timer) || + timeout_pending(&(*tp)->t_timer[timer])) + return -1; + CLR((*tp)->t_flags, TF_TIMER << timer); + + return 0; +} + +static inline void +tcp_timer_leave(struct inpcb *inp, struct socket *so) +{ + in_pcbsounlock_rele(inp, so); + NET_UNLOCK_SHARED(); + in_pcbunref(inp); +} + /* * Callout to process delayed ACKs for a TCPCB. */ @@ -113,6 +143,7 @@ void tcp_timer_delack(void *arg) { struct inpcb *inp = arg; + struct socket *so; struct tcpcb *otp = NULL, *tp; short ostate; @@ -121,15 +152,10 @@ tcp_timer_delack(void *arg) * for whatever reason, it will restart the delayed * ACK callout. */ - NET_LOCK(); - tp = intotcpcb(inp); - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_DELACK) || - timeout_pending(&tp->t_timer[TCPT_DELACK])) + if (tcp_timer_enter(inp, &so, &tp, TCPT_DELACK)) goto out; - CLR(tp->t_flags, TF_TMR_DELACK); - if (inp->inp_socket->so_options & SO_DEBUG) { + if (so->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -138,8 +164,7 @@ tcp_timer_delack(void *arg) if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_DELACK, 0); out: - NET_UNLOCK(); - in_pcbunref(inp); + tcp_timer_leave(inp, so); } /* @@ -199,19 +224,15 @@ void tcp_timer_rexmt(void *arg) { struct inpcb *inp = arg; + struct socket *so; struct tcpcb *otp = NULL, *tp; - uint32_t rto; short ostate; + uint32_t rto; - NET_LOCK(); - tp = intotcpcb(inp); - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_REXMT) || - timeout_pending(&tp->t_timer[TCPT_REXMT])) + if (tcp_timer_enter(inp, &so, &tp, TCPT_REXMT)) goto out; - CLR(tp->t_flags, TF_TMR_REXMT); - if ((tp->t_flags & TF_PMTUD_PEND) && inp && + if ((tp->t_flags & TF_PMTUD_PEND) && SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) && SEQ_LT(tp->t_pmtud_th_seq, (int)(tp->snd_una + tp->t_maxseg))) { struct sockaddr_in sin; @@ -249,7 +270,7 @@ tcp_timer_rexmt(void *arg) tp->t_softerror : ETIMEDOUT); goto out; } - if (inp->inp_socket->so_options & SO_DEBUG) { + if (so->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -270,13 +291,13 @@ tcp_timer_rexmt(void *arg) * lots more sophisticated searching to find the right * value here... */ - if (ip_mtudisc && inp && + if (ip_mtudisc && TCPS_HAVEESTABLISHED(tp->t_state) && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) { struct rtentry *rt = NULL; /* No data to send means path mtu is not a problem */ - if (!inp->inp_socket->so_snd.sb_cc) + if (!READ_ONCE(so->so_snd.sb_cc)) goto leave; rt = in_pcbrtentry(inp); @@ -391,31 +412,26 @@ tcp_timer_rexmt(void *arg) if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_REXMT, 0); out: - NET_UNLOCK(); - in_pcbunref(inp); + tcp_timer_leave(inp, so); } void tcp_timer_persist(void *arg) { struct inpcb *inp = arg; + struct socket *so; struct tcpcb *otp = NULL, *tp; - uint32_t rto; short ostate; uint64_t now; + uint32_t rto; - NET_LOCK(); - tp = intotcpcb(inp); - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_PERSIST) || - timeout_pending(&tp->t_timer[TCPT_PERSIST])) + if (tcp_timer_enter(inp, &so, &tp, TCPT_PERSIST)) goto out; - CLR(tp->t_flags, TF_TMR_PERSIST); if (TCP_TIMER_ISARMED(tp, TCPT_REXMT)) goto out; - if (inp->inp_socket->so_options & SO_DEBUG) { + if (so->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -445,26 +461,21 @@ tcp_timer_persist(void *arg) if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_PERSIST, 0); out: - NET_UNLOCK(); - in_pcbunref(inp); + tcp_timer_leave(inp, so); } void tcp_timer_keep(void *arg) { struct inpcb *inp = arg; + struct socket *so; struct tcpcb *otp = NULL, *tp; short ostate; - NET_LOCK(); - tp = intotcpcb(inp); - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_KEEP) || - timeout_pending(&tp->t_timer[TCPT_KEEP])) + if (tcp_timer_enter(inp, &so, &tp, TCPT_KEEP)) goto out; - CLR(tp->t_flags, TF_TMR_KEEP); - if (inp->inp_socket->so_options & SO_DEBUG) { + if (so->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -475,7 +486,7 @@ tcp_timer_keep(void *arg) goto out; } if ((atomic_load_int(&tcp_always_keepalive) || - inp->inp_socket->so_options & SO_KEEPALIVE) && + so->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { int maxidle; uint64_t now; @@ -509,28 +520,23 @@ tcp_timer_keep(void *arg) if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_KEEP, 0); out: - NET_UNLOCK(); - in_pcbunref(inp); + tcp_timer_leave(inp, so); } void tcp_timer_2msl(void *arg) { struct inpcb *inp = arg; + struct socket *so; struct tcpcb *otp = NULL, *tp; short ostate; - int maxidle; uint64_t now; + int maxidle; - NET_LOCK(); - tp = intotcpcb(inp); - /* Ignore canceled timeouts or timeouts that have been rescheduled. */ - if (tp == NULL || !ISSET(tp->t_flags, TF_TMR_2MSL) || - timeout_pending(&tp->t_timer[TCPT_2MSL])) + if (tcp_timer_enter(inp, &so, &tp, TCPT_2MSL)) goto out; - CLR(tp->t_flags, TF_TMR_2MSL); - if (inp->inp_socket->so_options & SO_DEBUG) { + if (so->so_options & SO_DEBUG) { otp = tp; ostate = tp->t_state; } @@ -546,8 +552,7 @@ tcp_timer_2msl(void *arg) if (otp) tcp_trace(TA_TIMER, ostate, tp, otp, NULL, TCPT_2MSL, 0); out: - NET_UNLOCK(); - in_pcbunref(inp); + tcp_timer_leave(inp, so); } void diff --git a/usr.sbin/bgpd/rde_rib.c b/usr.sbin/bgpd/rde_rib.c index 3477d6864..ed410ec09 100644 --- a/usr.sbin/bgpd/rde_rib.c +++ b/usr.sbin/bgpd/rde_rib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: rde_rib.c,v 1.266 2024/12/12 20:19:03 claudio Exp $ */ +/* $OpenBSD: rde_rib.c,v 1.267 2025/01/14 12:24:23 claudio Exp $ */ /* * Copyright (c) 2003, 2004 Claudio Jeker @@ -1273,6 +1273,7 @@ prefix_adjout_update(struct prefix *p, struct rde_peer *peer, /* nothing changed */ p->validation_state = state->vstate; p->lastchange = getmonotime(); + p->flags &= ~PREFIX_FLAG_STALE; return; } @@ -1343,6 +1344,7 @@ prefix_adjout_withdraw(struct prefix *p) /* already a withdraw, shortcut */ if (p->flags & PREFIX_FLAG_WITHDRAW) { p->lastchange = getmonotime(); + p->flags &= ~PREFIX_FLAG_STALE; return; } /* pending update just got withdrawn */