From 8774a990ee4094f16d596d4b78e0f3239e5d0c88 Mon Sep 17 00:00:00 2001
From: Gleb Smirnoff <glebius@FreeBSD.org>
Date: Mon, 15 Dec 2025 12:50:35 -0800
Subject: [PATCH] bpf: modularize ifnet(9) part of bpf

Imagine that bpf(9) tapping can happen at any point in the network stack,
not necessarily at interface transmit or receive.  To achieve that we need
a thin layer of abstraction defined by struct bif_methods, that defines
how generic bpf layer works with a tap point of this kind.

Implement ifnet(9) specific methods in a separate file bpf_ifnet.c.  At
this point there is 100% compatibility for all existing interfaces, there
is no KPI change, yet.  The legacy attaching KPI is layered over new ifnet
agnostic KPI.  The new KPI may change though, as we can implement multiple
DLTs per single tap point in a prettier fashion.

The new abstraction layer allows us to move all the 802.11 radio injection
hacks out of bpf.c into ieee80211_radiotap.c, so do that immediately as a
good proof of concept.

Reviewed by:		bz
Differential Revision:	https://reviews.freebsd.org/D53872
---
 sys/conf/files                    |   1 +
 sys/net/bpf.c                     | 726 +++++++++++-------------------
 sys/net/bpf.h                     |  28 +-
 sys/net/bpf_ifnet.c               | 257 +++++++++++
 sys/net80211/ieee80211_freebsd.c  |  31 --
 sys/net80211/ieee80211_radiotap.c | 128 +++++-
 6 files changed, 656 insertions(+), 515 deletions(-)
 create mode 100644 sys/net/bpf_ifnet.c

diff --git a/sys/conf/files b/sys/conf/files
index df939f5788ca..aa0d05df45c7 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4189,6 +4189,7 @@ net/bpf_buffer.c		optional bpf
 net/bpf_jitter.c		optional bpf_jitter
 net/bpf_filter.c		optional bpf | netgraph_bpf
 net/bpf_zerocopy.c		optional bpf
+net/bpf_ifnet.c			optional bpf
 net/bridgestp.c			optional bridge | if_bridge
 net/dummymbuf.c			optional dummymbuf
 net/ieee8023ad_lacp.c		optional lagg
diff --git a/sys/net/bpf.c b/sys/net/bpf.c
index 855242728e16..0155b27b4919 100644
--- a/sys/net/bpf.c
+++ b/sys/net/bpf.c
@@ -41,7 +41,6 @@
 
 #include <sys/param.h>
 #include <sys/conf.h>
-#include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/ktr.h>
@@ -60,7 +59,6 @@
 #include <sys/sysent.h>
 #include <sys/systm.h>
 
-#include <sys/event.h>
 #include <sys/file.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
@@ -71,7 +69,6 @@
 #include <net/if_var.h>
 #include <net/if_private.h>
 #include <net/if_vlan_var.h>
-#include <net/if_dl.h>
 #include <net/bpf.h>
 #include <net/bpf_buffer.h>
 #ifdef BPF_JITTER
@@ -79,31 +76,25 @@
 #endif
 #include <net/bpf_zerocopy.h>
 #include <net/bpfdesc.h>
-#include <net/route.h>
 #include <net/vnet.h>
 
-#include <netinet/in.h>
-#include <netinet/if_ether.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
-#include <net80211/ieee80211_freebsd.h>
-
 #include <security/mac/mac_framework.h>
 
 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
 
-static const struct bpfd_list dead_bpf_if = CK_LIST_HEAD_INITIALIZER();
-
 struct bpf_if {
 	struct bpfd_list	bif_dlist;	/* list of all interfaces */
 	LIST_ENTRY(bpf_if)	bif_next;	/* descriptor list */
 	u_int		bif_dlt;	/* link layer type */
 	u_int		bif_hdrlen;	/* length of link header */
-	struct bpfd_list bif_wlist;	/* writer-only list */
-	struct ifnet	*bif_ifp;	/* corresponding interface */
-	struct bpf_if	**bif_bpf;	/* Pointer to pointer to us */
 	volatile u_int	bif_refcnt;
+	struct bpfd_list bif_wlist;	/* writer-only list */
+	const struct bif_methods	*bif_methods;
+	void		*bif_softc;
+	const char	*bif_name;
 	struct epoch_context epoch_ctx;
 };
 
@@ -111,6 +102,61 @@ struct bpf_if {
 _Static_assert(offsetof(struct bpf_if, bif_dlist) == 0,
     "bpf_if shall start with bif_dlist");
 
+static inline void
+bif_attachd(struct bpf_if *bp)
+{
+	if (bp->bif_methods->bif_attachd != NULL)
+		bp->bif_methods->bif_attachd(bp->bif_softc);
+}
+
+static inline void
+bif_detachd(struct bpf_if *bp)
+{
+	if (bp->bif_methods->bif_detachd != NULL)
+		bp->bif_methods->bif_detachd(bp->bif_softc);
+}
+
+static inline uint32_t
+bif_wrsize(struct bpf_if *bp)
+{
+	if (bp->bif_methods->bif_wrsize != NULL)
+		return (bp->bif_methods->bif_wrsize(bp->bif_softc));
+	else
+		return (0);
+}
+
+static inline int
+bif_promisc(struct bpf_if *bp, bool on)
+{
+	if (bp->bif_methods->bif_promisc != NULL)
+		return (bp->bif_methods->bif_promisc(bp->bif_softc, on));
+	else
+		return (0);
+}
+
+#ifdef MAC
+static inline int
+bif_mac_check_receive(struct bpf_if *bp, struct bpf_d *d)
+{
+	if (bp->bif_methods->bif_mac_check_receive != NULL)
+		return (bp->bif_methods->bif_mac_check_receive(bp->bif_softc,
+		    d));
+	else
+		return (0);
+}
+#endif
+
+/*
+ * XXXGL: Once we migrate to tapping KPI that would specify packet direction
+ * we no longer need bif_chkdir method.
+ */
+static inline bool
+bpf_chkdir(struct bpf_d *d, struct mbuf *m)
+{
+	return (d->bd_bif->bif_methods->bif_chkdir(d->bd_bif->bif_softc, m,
+	    d->bd_direction));
+}
+
 struct bpf_program_buffer {
 	struct epoch_context	epoch_ctx;
 #ifdef BPF_JITTER
@@ -184,12 +230,9 @@ static void	bpfif_rele(struct bpf_if *);
 
 static void	bpfd_ref(struct bpf_d *);
 static void	bpfd_rele(struct bpf_d *);
-static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
+static int	bpf_attachd(struct bpf_d *d, struct bpf_if *);
 static void	bpf_detachd(struct bpf_d *, bool);
 static void	bpfd_free(epoch_context_t);
-static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
-		    struct sockaddr *, int *, struct bpf_d *);
-static int	bpf_setif(struct bpf_d *, struct ifreq *);
 static void	bpf_timed_out(void *);
 static __inline void
 		bpf_wakeup(struct bpf_d *);
@@ -298,7 +341,6 @@ bpfif_free(epoch_context_t ctx)
 	struct bpf_if *bp;
 
 	bp = __containerof(ctx, struct bpf_if, epoch_ctx);
-	if_rele(bp->bif_ifp);
 	free(bp, M_BPF);
 }
 
@@ -552,221 +594,6 @@ bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
 	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
 }
 
-/*
- * General BPF functions.
- */
-static int
-bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
-    struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
-{
-	const struct ieee80211_bpf_params *p;
-	struct ether_header *eh;
-	struct mbuf *m;
-	int error;
-	int len;
-	int hlen;
-	int slen;
-
-	/*
-	 * Build a sockaddr based on the data link layer type.
-	 * We do this at this level because the ethernet header
-	 * is copied directly into the data field of the sockaddr.
-	 * In the case of SLIP, there is no header and the packet
-	 * is forwarded as is.
-	 * Also, we are careful to leave room at the front of the mbuf
-	 * for the link level header.
-	 */
-	switch (linktype) {
-	case DLT_SLIP:
-		sockp->sa_family = AF_INET;
-		hlen = 0;
-		break;
-
-	case DLT_EN10MB:
-		sockp->sa_family = AF_UNSPEC;
-		/* XXX Would MAXLINKHDR be better? */
-		hlen = ETHER_HDR_LEN;
-		break;
-
-	case DLT_FDDI:
-		sockp->sa_family = AF_IMPLINK;
-		hlen = 0;
-		break;
-
-	case DLT_RAW:
-		sockp->sa_family = AF_UNSPEC;
-		hlen = 0;
-		break;
-
-	case DLT_NULL:
-		/*
-		 * null interface types require a 4 byte pseudo header which
-		 * corresponds to the address family of the packet.
-		 */
-		sockp->sa_family = AF_UNSPEC;
-		hlen = 4;
-		break;
-
-	case DLT_ATM_RFC1483:
-		/*
-		 * en atm driver requires 4-byte atm pseudo header.
-		 * though it isn't standard, vpi:vci needs to be
-		 * specified anyway.
-		 */
-		sockp->sa_family = AF_UNSPEC;
-		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
-		break;
-
-	case DLT_PPP:
-		sockp->sa_family = AF_UNSPEC;
-		hlen = 4;	/* This should match PPP_HDRLEN */
-		break;
-
-	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
-		sockp->sa_family = AF_IEEE80211;
-		hlen = 0;
-		break;
-
-	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
-		sockp->sa_family = AF_IEEE80211;
-		sockp->sa_len = 12;	/* XXX != 0 */
-		hlen = sizeof(struct ieee80211_bpf_params);
-		break;
-
-	default:
-		return (EIO);
-	}
-
-	len = uio->uio_resid;
-	if (len < hlen || len - hlen > ifp->if_mtu)
-		return (EMSGSIZE);
-
-	/* Allocate a mbuf, up to MJUM16BYTES bytes, for our write. */
-	m = m_get3(len, M_WAITOK, MT_DATA, M_PKTHDR);
-	if (m == NULL)
-		return (EIO);
-	m->m_pkthdr.len = m->m_len = len;
-	*mp = m;
-
-	error = uiomove(mtod(m, u_char *), len, uio);
-	if (error)
-		goto bad;
-
-	slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
-	if (slen == 0) {
-		error = EPERM;
-		goto bad;
-	}
-
-	/* Check for multicast destination */
-	switch (linktype) {
-	case DLT_EN10MB:
-		eh = mtod(m, struct ether_header *);
-		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
-			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
-			    ETHER_ADDR_LEN) == 0)
-				m->m_flags |= M_BCAST;
-			else
-				m->m_flags |= M_MCAST;
-		}
-		if (!(d->bd_flags & BPFD_HDRCMPLT)) {
-			memcpy(eh->ether_shost, IF_LLADDR(ifp),
-			    sizeof(eh->ether_shost));
-		}
-		break;
-	}
-
-	/*
-	 * Make room for link header, and copy it to sockaddr
-	 */
-	if (hlen != 0) {
-		if (sockp->sa_family == AF_IEEE80211) {
-			/*
-			 * Collect true length from the parameter header
-			 * NB: sockp is known to be zero'd so if we do a
-			 *     short copy unspecified parameters will be
-			 *     zero.
-			 * NB: packet may not be aligned after stripping
-			 *     bpf params
-			 * XXX check ibp_vers
-			 */
-			p = mtod(m, const struct ieee80211_bpf_params *);
-			hlen = p->ibp_len;
-			if (hlen > sizeof(sockp->sa_data)) {
-				error = EINVAL;
-				goto bad;
-			}
-		}
-		bcopy(mtod(m, const void *), sockp->sa_data, hlen);
-	}
-	*hdrlen = hlen;
-
-	return (0);
-bad:
-	m_freem(m);
-	return (error);
-}
-
-/*
- * Attach descriptor to the bpf interface, i.e. make d listen on bp,
- * then reset its buffers and counters with reset_d().
- */
-static void
-bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
-{
-	int op_w;
-
-	BPF_LOCK_ASSERT();
-
-	/*
-	 * Save sysctl value to protect from sysctl change
-	 * between reads
-	 */
-	op_w = V_bpf_optimize_writers || d->bd_writer;
-
-	if (d->bd_bif != NULL)
-		bpf_detachd(d, false);
-	/*
-	 * Point d at bp, and add d to the interface's list.
-	 * Since there are many applications using BPF for
-	 * sending raw packets only (dhcpd, cdpd are good examples)
-	 * we can delay adding d to the list of active listeners until
-	 * some filter is configured.
-	 */
-
-	BPFD_LOCK(d);
-	/*
-	 * Hold reference to bpif while descriptor uses this interface.
-	 */
-	bpfif_ref(bp);
-	d->bd_bif = bp;
-	if (op_w != 0) {
-		/* Add to writers-only list */
-		CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
-		/*
-		 * We decrement bd_writer on every filter set operation.
-		 * First BIOCSETF is done by pcap_open_live() to set up
-		 * snap length. After that appliation usually sets its own
-		 * filter.
-		 */
-		d->bd_writer = 2;
-	} else
-		CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
-
-	reset_d(d);
-
-	/* Trigger EVFILT_WRITE events. */
-	bpf_wakeup(d);
-
-	BPFD_UNLOCK(d);
-
-	CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
-	    __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
-
-	if (op_w == 0)
-		EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
-}
-
 /*
  * Check if we need to upgrade our descriptor @d from write-only mode.
  */
@@ -837,8 +664,7 @@ static void
 bpf_detachd(struct bpf_d *d, bool detached_ifp)
 {
 	struct bpf_if *bp;
-	struct ifnet *ifp;
-	int error;
+	bool writer;
 
 	BPF_LOCK_ASSERT();
 	CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
@@ -848,11 +674,8 @@ bpf_detachd(struct bpf_d *d, bool detached_ifp)
 		return;
 
 	BPFD_LOCK(d);
-	/* Remove d from the interface's descriptor list. */
 	CK_LIST_REMOVE(d, bd_next);
-	/* Save bd_writer value */
-	error = d->bd_writer;
-	ifp = bp->bif_ifp;
+	writer = (d->bd_writer > 0);
 	d->bd_bif = NULL;
 	if (detached_ifp) {
 		/*
@@ -863,30 +686,14 @@ bpf_detachd(struct bpf_d *d, bool detached_ifp)
 	}
 	BPFD_UNLOCK(d);
 
-	/* Call event handler iff d is attached */
-	if (error == 0)
-		EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
+	if (!writer)
+		bif_detachd(bp);
 
-	/*
-	 * Check if this descriptor had requested promiscuous mode.
-	 * If so and ifnet is not detached, turn it off.
-	 */
 	if (d->bd_promisc && !detached_ifp) {
 		d->bd_promisc = 0;
-		CURVNET_SET(ifp->if_vnet);
-		error = ifpromisc(ifp, 0);
-		CURVNET_RESTORE();
-		if (error != 0 && error != ENXIO) {
-			/*
-			 * ENXIO can happen if a pccard is unplugged
-			 * Something is really wrong if we were able to put
-			 * the driver into promiscuous mode, but can't
-			 * take it out.
-			 */
-			if_printf(bp->bif_ifp,
-				"bpf_detach: ifpromisc failed (%d)\n", error);
-		}
+		(void)bif_promisc(bp, false);
 	}
+
 	bpfif_rele(bp);
 }
 
@@ -1164,128 +971,96 @@ bpf_ready(struct bpf_d *d)
 static int
 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
 {
-	struct route ro;
-	struct sockaddr dst;
 	struct epoch_tracker et;
 	struct bpf_if *bp;
 	struct bpf_d *d;
-	struct ifnet *ifp;
 	struct mbuf *m, *mc;
-	int error, hlen;
+	ssize_t len;
+	int error;
 
 	error = devfs_get_cdevpriv((void **)&d);
 	if (error != 0)
 		return (error);
 
-	NET_EPOCH_ENTER(et);
-	BPFD_LOCK(d);
-	BPF_PID_REFRESH_CUR(d);
-	counter_u64_add(d->bd_wcount, 1);
-	if ((bp = d->bd_bif) == NULL) {
-		error = ENXIO;
-		goto out_locked;
-	}
-
-	ifp = bp->bif_ifp;
-	if ((ifp->if_flags & IFF_UP) == 0) {
-		error = ENETDOWN;
-		goto out_locked;
-	}
-
 	if (uio->uio_resid == 0)
-		goto out_locked;
+		return (0);
 
-	bzero(&dst, sizeof(dst));
-	m = NULL;
-	hlen = 0;
-
-	/*
-	 * Take extra reference, unlock d and exit from epoch section,
-	 * since bpf_movein() can sleep.
-	 */
-	bpfd_ref(d);
-	NET_EPOCH_EXIT(et);
-	BPFD_UNLOCK(d);
-
-	error = bpf_movein(uio, (int)bp->bif_dlt, ifp,
-	    &m, &dst, &hlen, d);
-
-	if (error != 0) {
+	BPFD_LOCK(d);
+	if ((bp = d->bd_bif) == NULL)
+		error = ENXIO;
+	else if (bp->bif_methods->bif_write == NULL)
+		error = EOPNOTSUPP;
+	if (error) {
+		BPFD_UNLOCK(d);
 		counter_u64_add(d->bd_wdcount, 1);
-		bpfd_rele(d);
 		return (error);
 	}
+	bpfd_ref(d);
+	BPFD_UNLOCK(d);
 
-	BPFD_LOCK(d);
-	/*
-	 * Check that descriptor is still attached to the interface.
-	 * This can happen on bpfdetach(). To avoid access to detached
-	 * ifnet, free mbuf and return ENXIO.
-	 */
-	if (d->bd_bif == NULL) {
-		counter_u64_add(d->bd_wdcount, 1);
-		BPFD_UNLOCK(d);
-		bpfd_rele(d);
-		m_freem(m);
-		return (ENXIO);
+	len = uio->uio_resid;
+	/* Allocate a mbuf, up to MJUM16BYTES bytes, for our write. */
+	m = m_get3(len, M_WAITOK, MT_DATA, M_PKTHDR);
+	if (m == NULL) {
+		error = ENOMEM;
+		goto fail_wref;
+	}
+	m->m_pkthdr.len = m->m_len = len;
+
+	error = uiomove(mtod(m, u_char *), len, uio);
+	if (error)
+		goto fail_wref;
+
+	if (bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len) == 0) {
+		error = EPERM;
+		goto fail_wref;
 	}
-	counter_u64_add(d->bd_wfcount, 1);
-	if (d->bd_flags & BPFD_HDRCMPLT)
-		dst.sa_family = pseudo_AF_HDRCMPLT;
 
 	if (d->bd_flags & BPFD_FEEDBACK) {
-		mc = m_dup(m, M_NOWAIT);
-		if (mc != NULL)
-			mc->m_pkthdr.rcvif = ifp;
+		mc = m_dup(m, M_WAITOK);
 		/* Set M_PROMISC for outgoing packets to be discarded. */
 		if (d->bd_direction == BPF_D_INOUT)
 			m->m_flags |= M_PROMISC;
 	} else
 		mc = NULL;
 
-	m->m_pkthdr.len -= hlen;
-	m->m_len -= hlen;
-	m->m_data += hlen;	/* XXX */
+	/* XXXGL: should belong to bpf_ifnet.c */
+	if (d->bd_pcp != 0)
+		(void)vlan_set_pcp(m, d->bd_pcp);
 
-	CURVNET_SET(ifp->if_vnet);
+	BPFD_LOCK(d);
 #ifdef MAC
 	mac_bpfdesc_create_mbuf(d, m);
 	if (mc != NULL)
 		mac_bpfdesc_create_mbuf(d, mc);
 #endif
-
-	bzero(&ro, sizeof(ro));
-	if (hlen != 0) {
-		ro.ro_prepend = (u_char *)&dst.sa_data;
-		ro.ro_plen = hlen;
-		ro.ro_flags = RT_HAS_HEADER;
+	/*
+	 * Check that descriptor is still attached to the interface.
+	 * This can happen on bpfdetach() or if other thread did BIOCSDLT.
+	 */
+	if (__predict_false(d->bd_bif != bp)) {
+		BPFD_UNLOCK(d);
+		m_freem(mc);
+		error = ENXIO;
+		goto fail_wref;
 	}
-
-	if (d->bd_pcp != 0)
-		vlan_set_pcp(m, d->bd_pcp);
-
-	/* Avoid possible recursion on BPFD_LOCK(). */
-	NET_EPOCH_ENTER(et);
 	BPFD_UNLOCK(d);
-	error = (*ifp->if_output)(ifp, m, &dst, &ro);
+
+	NET_EPOCH_ENTER(et);
+	error = bp->bif_methods->bif_write(bp->bif_softc, m, mc, d->bd_flags);
+	NET_EPOCH_EXIT(et);
 	if (error)
 		counter_u64_add(d->bd_wdcount, 1);
-
-	if (mc != NULL) {
-		if (error == 0)
-			(*ifp->if_input)(ifp, mc);
-		else
-			m_freem(mc);
-	}
-	NET_EPOCH_EXIT(et);
-	CURVNET_RESTORE();
+	else
+		counter_u64_add(d->bd_wfcount, 1);
 	bpfd_rele(d);
+
 	return (error);
 
-out_locked:
+fail_wref:
 	counter_u64_add(d->bd_wdcount, 1);
-	NET_EPOCH_EXIT(et);
-	BPFD_UNLOCK(d);
+	bpfd_rele(d);
+	m_freem(m);
 	return (error);
 }
 
@@ -1506,8 +1281,9 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 			 */
 			error = EINVAL;
 		} else if (d->bd_promisc == 0) {
-			error = ifpromisc(d->bd_bif->bif_ifp, 1);
-			if (error == 0)
+			struct bpf_if *bp = d->bd_bif;
+
+			if ((error = bif_promisc(bp, true)) == 0)
 				d->bd_promisc = 1;
 		}
 		BPF_UNLOCK();
@@ -1579,10 +1355,10 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 		if (d->bd_bif == NULL)
 			error = EINVAL;
 		else {
-			struct ifnet *const ifp = d->bd_bif->bif_ifp;
+			struct bpf_if *const bp = d->bd_bif;
 			struct ifreq *const ifr = (struct ifreq *)addr;
 
-			strlcpy(ifr->ifr_name, ifp->if_xname,
+			strlcpy(ifr->ifr_name, bp->bif_name,
 			    sizeof(ifr->ifr_name));
 		}
 		BPF_UNLOCK();
@@ -1591,7 +1367,10 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 	/*
 	 * Set interface.
 	 */
-	case BIOCSETIF:
+	case BIOCSETIF: {
+		struct ifreq *const ifr = (struct ifreq *)addr;
+		struct bpf_if *bp;
+
 		/*
 		 * Behavior here depends on the buffering model.  If we're
 		 * using kernel memory buffers, then we can allocate them here.
@@ -1611,10 +1390,21 @@ bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
 		} else
 			BPFD_UNLOCK(d);
 		BPF_LOCK();
-		error = bpf_setif(d, (struct ifreq *)addr);
+		/*
+		 * Look through attached interfaces for the named one.
+		 */
+		LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+			if (strncmp(ifr->ifr_name, bp->bif_name,
+			    sizeof(ifr->ifr_name)) == 0)
+				break;
+		}
+		if (bp != NULL)
+			error = bpf_attachd(d, bp);
+		else
+			error = ENXIO;
 		BPF_UNLOCK();
 		break;
-
+	}
 	/*
 	 * Set read timeout.
 	 */
@@ -2051,41 +1841,24 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 		NET_EPOCH_CALL(bpf_program_buffer_free, &fcode->epoch_ctx);
 
 	if (track_event)
-		EVENTHANDLER_INVOKE(bpf_track,
-		    d->bd_bif->bif_ifp, d->bd_bif->bif_dlt, 1);
+		bif_attachd(d->bd_bif);
 
 	BPF_UNLOCK();
 	return (0);
 }
 
 /*
- * Detach a file from its current interface (if attached at all) and attach
- * to the interface indicated by the name stored in ifr.
- * Return an errno or 0.
+ * Attach descriptor to a tap point, possibly detaching from the old one,
+ * reset the counters.
+ * XXXGL: this KPI is subject to change
  */
 static int
-bpf_setif(struct bpf_d *d, struct ifreq *ifr)
+bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 {
-	struct bpf_if *bp;
-	struct ifnet *theywant;
+	bool writer;
 
 	BPF_LOCK_ASSERT();
 
-	theywant = ifunit(ifr->ifr_name);
-	if (theywant == NULL)
-		return (ENXIO);
-	/*
-	 * Look through attached interfaces for the named one.
-	 */
-	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		if (bp->bif_ifp == theywant &&
-		    bp->bif_bpf == &theywant->if_bpf)
-			break;
-	}
-	if (bp == NULL)
-		return (ENXIO);
-
-	MPASS(bp == theywant->if_bpf);
 	/*
 	 * At this point, we expect the buffer is already allocated.  If not,
 	 * return an error.
@@ -2098,15 +1871,61 @@ bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 		break;
 
 	default:
-		panic("bpf_setif: bufmode %d", d->bd_bufmode);
+		panic("%s: bufmode %d", __func__, d->bd_bufmode);
 	}
-	if (bp != d->bd_bif)
-		bpf_attachd(d, bp);
-	else {
+
+	if (bp == d->bd_bif) {
 		BPFD_LOCK(d);
 		reset_d(d);
 		BPFD_UNLOCK(d);
-	}
+		return (0);
+	} else if (d->bd_bif != NULL)
+		bpf_detachd(d, false);
+
+	/*
+	 * Save sysctl value to protect from sysctl change between reads.
+	 */
+	writer = V_bpf_optimize_writers || (d->bd_writer > 0);
+
+	/*
+	 * Point d at bp, and add d to the interface's list.
+	 * Since there are many applications using BPF for
+	 * sending raw packets only (dhcpd, cdpd are good examples)
+	 * we can delay adding d to the list of active listeners until
+	 * some filter is configured.
+	 */
+	BPFD_LOCK(d);
+	/*
+	 * Hold reference to bpif while descriptor uses this interface.
+	 */
+	bpfif_ref(bp);
+	d->bd_bif = bp;
+	if (writer) {
+		/* Add to writers-only list */
+		CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
+		/*
+		 * We decrement bd_writer on every filter set operation.
+		 * First BIOCSETF is done by pcap_open_live() to set up
+		 * snap length. After that application usually sets its own
+		 * filter.
+		 */
+		d->bd_writer = 2;
+	} else
+		CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+
+	reset_d(d);
+
+	/* Trigger EVFILT_WRITE events. */
+	bpf_wakeup(d);
+
+	BPFD_UNLOCK(d);
+
+	CTR3(KTR_NET, "%s: called by pid %d, adding to %s list",
+	    __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
+
+	if (!writer)
+		bif_attachd(bp);
+
 	return (0);
 }
 
@@ -2229,7 +2048,7 @@ filt_bpfwrite(struct knote *kn, long hint)
 		kn->kn_data = 0;
 		return (0);
 	} else {
-		kn->kn_data = d->bd_bif->bif_ifp->if_mtu;
+		kn->kn_data = bif_wrsize(d->bd_bif);
 		return (1);
 	}
 }
@@ -2305,7 +2124,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 	CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 		counter_u64_add(d->bd_rcount, 1);
 		/*
-		 * NB: We dont call BPF_CHECK_DIRECTION() here since there
+		 * NB: We don't check the direction here since there
 		 * is no way for the caller to indiciate to us whether this
 		 * packet is inbound or outbound. In the bpf_mtap() routines,
 		 * we use the interface pointers on the mbuf to figure it out.
@@ -2327,7 +2146,7 @@ bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
 				gottime = bpf_gettime(&bt, d->bd_tstamp,
 				    NULL);
 #ifdef MAC
-			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+			if (bif_mac_check_receive(bp, d) == 0)
 #endif
 				catchpacket(d, pkt, pktlen, slen,
 				    bpf_append_bytes, &bt);
@@ -2344,10 +2163,6 @@ bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen)
 		bpf_tap(ifp->if_bpf, pkt, pktlen);
 }
 
-#define	BPF_CHECK_DIRECTION(d, r, i)				\
-	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
-	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
-
 /*
  * Incoming linkage from device drivers, when packet is in an mbuf chain.
  * Locking model is explained in bpf_tap().
@@ -2375,7 +2190,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 
 	NET_EPOCH_ENTER(et);
 	CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
-		if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp))
+		if (bpf_chkdir(d, m))
 			continue;
 		counter_u64_add(d->bd_rcount, 1);
 #ifdef BPF_JITTER
@@ -2394,7 +2209,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
 			if (gottime < bpf_ts_quality(d->bd_tstamp))
 				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
 #ifdef MAC
-			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+			if (bif_mac_check_receive(bp, d) == 0)
 #endif
 				catchpacket(d, (u_char *)m, pktlen, slen,
 				    bpf_append_mbuf, &bt);
@@ -2449,7 +2264,7 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
 
 	NET_EPOCH_ENTER(et);
 	CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
-		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
+		if (bpf_chkdir(d, m))
 			continue;
 		counter_u64_add(d->bd_rcount, 1);
 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
@@ -2460,7 +2275,7 @@ bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
 			if (gottime < bpf_ts_quality(d->bd_tstamp))
 				gottime = bpf_gettime(&bt, d->bd_tstamp, m);
 #ifdef MAC
-			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
+			if (bif_mac_check_receive(bp, d) == 0)
 #endif
 				catchpacket(d, (u_char *)&mb, pktlen, slen,
 				    bpf_append_mbuf, &bt);
@@ -2479,7 +2294,6 @@ bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m)
 	}
 }
 
-#undef	BPF_CHECK_DIRECTION
 #undef	BPF_TSTAMP_NONE
 #undef	BPF_TSTAMP_FAST
 #undef	BPF_TSTAMP_NORMAL
@@ -2759,57 +2573,36 @@ bpfd_free(epoch_context_t ctx)
 }
 
 /*
- * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
- * fixed size of the link header (variable length headers not yet supported).
+ * Attach a tap point to bpf.
+ * XXX: with current KPI it is consumer's responsibility to avoid duplicates.
  */
-void
-bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
-{
-
-	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
-}
-
-/*
- * Attach an interface to bpf.  ifp is a pointer to the structure
- * defining the interface to be attached, dlt is the link layer type,
- * and hdrlen is the fixed size of the link header (variable length
- * headers are not yet supporrted).
- */
-void
-bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen,
-    struct bpf_if **driverp)
+struct bpf_if *
+bpf_attach(const char *name, u_int dlt, u_int hdrlen,
+    const struct bif_methods *methods, void *sc)
 {
 	struct bpf_if *bp;
 
-	KASSERT(*driverp == NULL,
-	    ("bpfattach2: driverp already initialized"));
-
 	bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
 
 	CK_LIST_INIT(&bp->bif_dlist);
 	CK_LIST_INIT(&bp->bif_wlist);
-	bp->bif_ifp = ifp;
 	bp->bif_dlt = dlt;
 	bp->bif_hdrlen = hdrlen;
-	bp->bif_bpf = driverp;
+	bp->bif_softc = sc;
+	bp->bif_name = name;
+	bp->bif_methods = methods;
 	refcount_init(&bp->bif_refcnt, 1);
-	*driverp = bp;
-	/*
-	 * Reference ifnet pointer, so it won't freed until
-	 * we release it.
-	 */
-	if_ref(ifp);
 	BPF_LOCK();
 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
 	BPF_UNLOCK();
 
-	if (bootverbose && IS_DEFAULT_VNET(curvnet))
-		if_printf(ifp, "bpf attached\n");
+	return (bp);
 }
 
 #ifdef VIMAGE
 /*
  * Detach descriptors on interface's vmove event.
+ * XXXGL: shouldn't be a special case, but a full detach.
  */
 void
 bpf_ifdetach(struct ifnet *ifp)
@@ -2819,7 +2612,8 @@ bpf_ifdetach(struct ifnet *ifp)
 
 	BPF_LOCK();
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		if (bp->bif_ifp != ifp)
+		/* XXXGL: assuming softc is ifnet here */
+		if (bp->bif_softc != ifp)
 			continue;
 
 		/* Detach common descriptors */
@@ -2837,40 +2631,31 @@ bpf_ifdetach(struct ifnet *ifp)
 #endif
 
 /*
- * Detach bpf from an interface. This involves detaching each descriptor
- * associated with the interface. Notify each descriptor as it's detached
- * so that any sleepers wake up and get ENXIO.
+ * Detach bpf tap point.  This involves detaching each descriptor associated
+ * with the interface.  Notify each descriptor as it's detached so that any
+ * sleepers wake up and get ENXIO.
  */
 void
-bpfdetach(struct ifnet *ifp)
+bpf_detach(struct bpf_if *bp)
 {
-	struct bpf_if *bp, *bp_temp;
 	struct bpf_d *d;
 
 	BPF_LOCK();
-	/* Find all bpf_if struct's which reference ifp and detach them. */
-	LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
-		if (ifp != bp->bif_ifp)
-			continue;
+	LIST_REMOVE(bp, bif_next);
 
-		LIST_REMOVE(bp, bif_next);
-		*bp->bif_bpf = __DECONST(struct bpf_if *, &dead_bpf_if);
+	CTR3(KTR_NET, "%s: sheduling free for encap %d for bp %p",
+	    __func__, bp->bif_dlt, bp);
 
-		CTR4(KTR_NET,
-		    "%s: sheduling free for encap %d (%p) for if %p",
-		    __func__, bp->bif_dlt, bp, ifp);
-
-		/* Detach common descriptors */
-		while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
-			bpf_detachd(d, true);
-		}
-
-		/* Detach writer-only descriptors */
-		while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
-			bpf_detachd(d, true);
-		}
-		bpfif_rele(bp);
+	/* Detach common descriptors */
+	while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
+		bpf_detachd(d, true);
 	}
+
+	/* Detach writer-only descriptors */
+	while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
+		bpf_detachd(d, true);
+	}
+	bpfif_rele(bp);
 	BPF_UNLOCK();
 }
 
@@ -2881,22 +2666,24 @@ bpf_peers_present_if(struct ifnet *ifp)
 }
 
 /*
- * Get a list of available data link type of the interface.
+ * Get a list of available data link type of the tap point.  If a tap point
+ * attaches more than one time, it is supposed to attach with different DLTs
+ * but with the same name pointer.
  */
 static int
 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 {
-	struct ifnet *ifp;
+	const char *name;
 	struct bpf_if *bp;
 	u_int *lst;
 	int error, n, n1;
 
 	BPF_LOCK_ASSERT();
 
-	ifp = d->bd_bif->bif_ifp;
+	name = d->bd_bif->bif_name;
 	n1 = 0;
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		if (bp->bif_ifp == ifp)
+		if (bp->bif_name == name)
 			n1++;
 	}
 	if (bfl->bfl_list == NULL) {
@@ -2909,7 +2696,7 @@ bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 	lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
 	n = 0;
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		if (bp->bif_ifp != ifp)
+		if (bp->bif_name != name)
 			continue;
 		lst[n++] = bp->bif_dlt;
 	}
@@ -2920,13 +2707,20 @@ bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 }
 
 /*
- * Set the data link type of a BPF instance.
+ * Set the data link type of a BPF descriptor.  The convention is that
+ * application first do BIOCSETIF and then BIOCSETDLT, thus the descriptor
+ * is supposed to be already attached.  Only one kernel facility provides
+ * tapping points with same name but different DLT - ieee80211_radiotap.
+ *
+ * XXXGL: this function definitely looks suspicious, e.g. it clearly doesn't
+ * clear promisc on the old bpf_if.  The convention about reference counting
+ * is also unclear.
  */
 static int
 bpf_setdlt(struct bpf_d *d, u_int dlt)
 {
 	int error, opromisc;
-	struct ifnet *ifp;
+	const char *name;
 	struct bpf_if *bp;
 
 	BPF_LOCK_ASSERT();
@@ -2939,21 +2733,22 @@ bpf_setdlt(struct bpf_d *d, u_int dlt)
 	if (d->bd_bif->bif_dlt == dlt)
 		return (0);
 
-	ifp = d->bd_bif->bif_ifp;
+	name = d->bd_bif->bif_name;
 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
+		if (bp->bif_name == name && bp->bif_dlt == dlt)
 			break;
 	}
 	if (bp == NULL)
 		return (EINVAL);
 
 	opromisc = d->bd_promisc;
+	bpf_detachd(d, false);
 	bpf_attachd(d, bp);
 	if (opromisc) {
-		error = ifpromisc(bp->bif_ifp, 1);
+		error = bp->bif_methods->bif_promisc(bp->bif_softc, true);
 		if (error)
-			if_printf(bp->bif_ifp, "%s: ifpromisc failed (%d)\n",
-			    __func__, error);
+			printf("%s: bif_promisc on %s failed (%d)\n",
+			    __func__, bp->bif_name, error);
 		else
 			d->bd_promisc = 1;
 	}
@@ -3024,8 +2819,7 @@ bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
 	d->bd_hlen = bd->bd_hlen;
 	d->bd_bufsize = bd->bd_bufsize;
 	d->bd_pid = bd->bd_pid;
-	strlcpy(d->bd_ifname,
-	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
+	strlcpy(d->bd_ifname, bd->bd_bif->bif_name, sizeof(d->bd_ifname));
 	d->bd_locked = bd->bd_flags & BPFD_LOCKED ? 1 : 0;
 	d->bd_wcount = counter_u64_fetch(bd->bd_wcount);
 	d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount);
@@ -3154,15 +2948,9 @@ bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m)
 void
 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
 {
+	static const struct bpfd_list dead_bpf_if = CK_LIST_HEAD_INITIALIZER();
 
-	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
-}
-
-void
-bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
-{
-
-	*driverp = __DECONST(struct bpf_if *, &dead_bpf_if);
+	ifp->if_bpf = __DECONST(struct bpf_if *, &dead_bpf_if);
 }
 
 void
diff --git a/sys/net/bpf.h b/sys/net/bpf.h
index b18840bfd2bf..cda03c06d51d 100644
--- a/sys/net/bpf.h
+++ b/sys/net/bpf.h
@@ -407,9 +407,14 @@ SYSCTL_DECL(_net_bpf);
  * Part of this structure is exposed to external callers to speed up
  * bpf_peers_present() calls.
  */
+struct mbuf;
 struct bpf_if;
+struct bif_methods;
 CK_LIST_HEAD(bpfd_list, bpf_d);
 
+struct bpf_if *	bpf_attach(const char *, u_int, u_int,
+	    const struct bif_methods *, void *);
+void	bpf_detach(struct bpf_if *);
 void	bpf_bufheld(struct bpf_d *d);
 int	bpf_validate(const struct bpf_insn *, int);
 void	bpf_tap(struct bpf_if *, u_char *, u_int);
@@ -419,7 +424,6 @@ void	bpf_mtap_if(struct ifnet *, struct mbuf *);
 void	bpf_mtap2(struct bpf_if *, void *, u_int, struct mbuf *);
 void	bpf_mtap2_if(struct ifnet *, void *, u_int, struct mbuf *);
 void	bpfattach(struct ifnet *, u_int, u_int);
-void	bpfattach2(struct ifnet *, u_int, u_int, struct bpf_if **);
 void	bpfdetach(struct ifnet *);
 bool	bpf_peers_present_if(struct ifnet *);
 #ifdef VIMAGE
@@ -444,6 +448,28 @@ bpf_peers_present(const struct bpf_if *bpf)
 	bpf_mtap_if((_ifp), (_m))
 #define BPF_MTAP2(_ifp, _data, _dlen, _m) 			\
 	bpf_mtap2_if((_ifp), (_data), (_dlen), (_m))
+
+typedef void		bif_attachd_t(void *);
+typedef void		bif_detachd_t(void *);
+typedef bool		bif_chkdir_t(void *, const struct mbuf *, int);
+typedef int		bif_write_t(void *, struct mbuf *, struct mbuf *, int);
+typedef uint32_t	bif_wrsize_t(void *);
+typedef int		bif_promisc_t(void *, bool);
+typedef int		bif_mac_check_receive_t(void *, struct bpf_d *);
+struct bif_methods {
+	bif_attachd_t	*bif_attachd;
+	bif_detachd_t	*bif_detachd;
+	bif_chkdir_t	*bif_chkdir;
+	bif_promisc_t	*bif_promisc;
+	/* Writable taps shall implement the below methods. */
+	bif_write_t	*bif_write;
+	bif_wrsize_t	*bif_wrsize;
+	bif_mac_check_receive_t *bif_mac_check_receive;
+};
+
+/* Ifnet methods implemented in bpf_ifnet.c are shared with net80211. */
+extern bif_wrsize_t bpf_ifnet_wrsize;
+extern bif_promisc_t bpf_ifnet_promisc;
 #endif /* _KERNEL */
 
 /*
diff --git a/sys/net/bpf_ifnet.c b/sys/net/bpf_ifnet.c
new file mode 100644
index 000000000000..d4fd50bd3d8e
--- /dev/null
+++ b/sys/net/bpf_ifnet.c
@@ -0,0 +1,257 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 2019 Andrey V. Elsukov <ae@FreeBSD.org>
+ * Copyright (c) 2025 Gleb Smirnoff <glebius@FreeBSD.org>
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_private.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+/* We need to know all the ifnets we support. */
+#include <net/if_dl.h>
+#include <net/ethernet.h>
+#include <net/firewire.h>
+#include <net/if_pflog.h>
+#include <net/if_pfsync.h>
+
+#include <security/mac/mac_framework.h>
+
+static int
+bpf_ifnet_write(void *arg, struct mbuf *m, struct mbuf *mc, int flags)
+{
+	struct ifnet *ifp = arg;
+	struct route ro = {};
+	struct sockaddr dst = {
+		.sa_family = AF_UNSPEC,
+	};
+	u_int hlen;
+	int error;
+
+	NET_EPOCH_ASSERT();
+
+	if (__predict_false((ifp->if_flags & IFF_UP) == 0)) {
+		m_freem(m);
+		m_freem(mc);
+		return (ENETDOWN);
+	}
+
+	switch (ifp->if_type) {
+	/* DLT_RAW */
+	case IFT_MBIM:		/* umb(4) */
+	case IFT_OTHER:		/* uhso(4), usie */
+		hlen = 0;
+		break;
+
+	/* DLT_ENC */
+	case IFT_ENC:
+		hlen = 12; /* XXXGL: sizeof(struct enchdr); */
+		break;
+
+	/* DLT_EN10MB */
+	case IFT_ETHER:		/* if_ethersubr.c */
+	case IFT_L2VLAN:	/* vlan(4) */
+	case IFT_IEEE8023ADLAG:	/* lagg(4) */
+	case IFT_INFINIBAND:	/* if_infiniband.c */
+	{
+		struct ether_header *eh;
+
+		eh = mtod(m, struct ether_header *);
+		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
+			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
+			    ETHER_ADDR_LEN) == 0)
+				m->m_flags |= M_BCAST;
+			else
+				m->m_flags |= M_MCAST;
+		}
+		if (!(flags & BPFD_HDRCMPLT)) {
+			memcpy(eh->ether_shost, IF_LLADDR(ifp),
+			    sizeof(eh->ether_shost));
+		}
+		hlen = ETHER_HDR_LEN;
+		break;
+	}
+	/* DLT_APPLE_IP_OVER_IEEE1394 */
+	case IFT_IEEE1394:	/* fwip(4) */
+		hlen = sizeof(struct fw_hwaddr);
+		break;
+
+	/* DLT_NULL */
+	case IFT_GIF:		/* gif(4) */
+	case IFT_LOOP:		/* lo(4), disc(4) */
+	case IFT_PARA:		/* plip(4), iic */
+	case IFT_PPP:		/* tun(4) */
+	case IFT_PROPVIRTUAL:	/* ng_iface(4) */
+	case IFT_WIREGUARD:	/* wg(4) */
+	case IFT_STF:		/* stf(4) */
+	case IFT_TUNNEL:	/* ipsec(4), me(4), gre(4), ovpn(4) */
+		hlen = sizeof(uint32_t);
+		break;
+
+	/* DLT_PFLOG */
+	case IFT_PFLOG:
+		hlen = PFLOG_HDRLEN;
+		break;
+
+	/* DLT_PFSYNC */
+	case IFT_PFSYNC:
+		hlen = PFSYNC_HDRLEN;
+		break;
+
+	default:
+		hlen = 0;	/* pacify compiler */
+		KASSERT(0, ("%s: ifp %p type %u not supported", __func__,
+		    ifp, ifp->if_type));
+	}
+
+	if (__predict_false(hlen > m->m_len)) {
+		m_freem(m);
+		m_freem(mc);
+		return (EMSGSIZE);
+	};
+
+	if (hlen != 0) {
+		bcopy(mtod(m, const void *), &dst.sa_data, hlen);
+		ro.ro_prepend = (char *)&dst.sa_data;
+		ro.ro_plen = hlen;
+		ro.ro_flags = RT_HAS_HEADER;
+		m->m_pkthdr.len -= hlen;
+		m->m_len -= hlen;
+		m->m_data += hlen;
+	};
+
+	CURVNET_SET(ifp->if_vnet);
+	error = ifp->if_output(ifp, m, &dst, &ro);
+	if (error != 0) {
+		m_freem(mc);
+	} else if (mc != NULL) {
+		mc->m_pkthdr.rcvif = ifp;
+		(void)ifp->if_input(ifp, mc);
+	}
+	CURVNET_RESTORE();
+
+	return (error);
+}
+
+static bool
+bpf_ifnet_chkdir(void *arg, const struct mbuf *m, int dir)
+{
+	struct ifnet *ifp = arg;
+	struct ifnet *rcvif = m_rcvif(m);
+
+	return ((dir == BPF_D_IN && ifp != rcvif) ||
+	    (dir == BPF_D_OUT && ifp == rcvif));
+}
+
+uint32_t
+bpf_ifnet_wrsize(void *arg)
+{
+	struct ifnet *ifp = arg;
+
+	return (ifp->if_mtu);
+}
+
+int
+bpf_ifnet_promisc(void *arg, bool on)
+{
+	struct ifnet *ifp = arg;
+	int error;
+
+	CURVNET_SET(ifp->if_vnet);
+	if ((error = ifpromisc(ifp, on ? 1 : 0)) != 0)
+		if_printf(ifp, "%s: ifpromisc failed (%d)\n", __func__, error);
+	CURVNET_RESTORE();
+
+	return (error);
+}
+
+#ifdef MAC
+static int
+bpf_ifnet_mac_check_receive(void *arg, struct bpf_d *d)
+{
+	struct ifnet *ifp = arg;
+
+	return (mac_bpfdesc_check_receive(d, ifp));
+}
+#endif
+
+static const struct bif_methods bpf_ifnet_methods = {
+	.bif_chkdir = bpf_ifnet_chkdir,
+	.bif_promisc = bpf_ifnet_promisc,
+	.bif_wrsize = bpf_ifnet_wrsize,
+	.bif_write = bpf_ifnet_write,
+#ifdef MAC
+	.bif_mac_check_receive = bpf_ifnet_mac_check_receive,
+#endif
+};
+
+/*
+ * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
+ * fixed size of the link header (variable length headers not yet supported).
+ * Legacy KPI to be obsoleted soon.
+ */
+void
+bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
+{
+
+	ifp->if_bpf = bpf_attach(ifp->if_xname, dlt, hdrlen,
+	    &bpf_ifnet_methods, ifp);
+	if_ref(ifp);
+	if (bootverbose && IS_DEFAULT_VNET(curvnet))
+		if_printf(ifp, "bpf attached\n");
+}
+
+/*
+ * The dead_bpf_if is an ugly plug against races at ifnet destroy time that
+ * still exist and are not properly covered by epoch(9).
+ * Legacy KPI to be obsoleted soon.
+ */
+void
+bpfdetach(struct ifnet *ifp)
+{
+	static const struct bpfd_list dead_bpf_if = CK_LIST_HEAD_INITIALIZER();
+	struct bpf_if *bif;
+
+	bif = ifp->if_bpf;
+	ifp->if_bpf = __DECONST(struct bpf_if *, &dead_bpf_if);
+	bpf_detach(bif);
+	if_rele(ifp);
+}
diff --git a/sys/net80211/ieee80211_freebsd.c b/sys/net80211/ieee80211_freebsd.c
index e01d0bf20f5e..b1380eb8a81c 100644
--- a/sys/net80211/ieee80211_freebsd.c
+++ b/sys/net80211/ieee80211_freebsd.c
@@ -1088,35 +1088,7 @@ ieee80211_load_module(const char *modname)
 #endif
 }
 
-static eventhandler_tag wlan_bpfevent;
 static eventhandler_tag wlan_ifllevent;
-
-static void
-bpf_track(void *arg, struct ifnet *ifp, int dlt, int attach)
-{
-	/* NB: identify vap's by if_init */
-	if (dlt == DLT_IEEE802_11_RADIO &&
-	    ifp->if_init == ieee80211_init) {
-		struct ieee80211vap *vap = ifp->if_softc;
-		/*
-		 * Track bpf radiotap listener state.  We mark the vap
-		 * to indicate if any listener is present and the com
-		 * to indicate if any listener exists on any associated
-		 * vap.  This flag is used by drivers to prepare radiotap
-		 * state only when needed.
-		 */
-		if (attach) {
-			ieee80211_syncflag_ext(vap, IEEE80211_FEXT_BPF);
-			if (vap->iv_opmode == IEEE80211_M_MONITOR)
-				atomic_add_int(&vap->iv_ic->ic_montaps, 1);
-		} else if (!bpf_peers_present(vap->iv_rawbpf)) {
-			ieee80211_syncflag_ext(vap, -IEEE80211_FEXT_BPF);
-			if (vap->iv_opmode == IEEE80211_M_MONITOR)
-				atomic_subtract_int(&vap->iv_ic->ic_montaps, 1);
-		}
-	}
-}
-
 /*
  * Change MAC address on the vap (if was not started).
  */
@@ -1385,8 +1357,6 @@ wlan_modevent(module_t mod, int type, void *unused)
 	case MOD_LOAD:
 		if (bootverbose)
 			printf("wlan: <802.11 Link Layer>\n");
-		wlan_bpfevent = EVENTHANDLER_REGISTER(bpf_track,
-		    bpf_track, 0, EVENTHANDLER_PRI_ANY);
 		wlan_ifllevent = EVENTHANDLER_REGISTER(iflladdr_event,
 		    wlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
 		struct if_clone_addreq req = {
@@ -1398,7 +1368,6 @@ wlan_modevent(module_t mod, int type, void *unused)
 		return 0;
 	case MOD_UNLOAD:
 		ifc_detach_cloner(wlan_cloner);
-		EVENTHANDLER_DEREGISTER(bpf_track, wlan_bpfevent);
 		EVENTHANDLER_DEREGISTER(iflladdr_event, wlan_ifllevent);
 		return 0;
 	}
diff --git a/sys/net80211/ieee80211_radiotap.c b/sys/net80211/ieee80211_radiotap.c
index e79aba550f50..4d36be6df9f5 100644
--- a/sys/net80211/ieee80211_radiotap.c
+++ b/sys/net80211/ieee80211_radiotap.c
@@ -43,8 +43,10 @@
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_var.h>
+#include <net/if_private.h>
 #include <net/if_media.h>
 #include <net/ethernet.h>
+#include <net/route.h>
 
 #include <net80211/ieee80211_var.h>
 
@@ -108,20 +110,6 @@ ieee80211_radiotap_detach(struct ieee80211com *ic)
 {
 }
 
-void
-ieee80211_radiotap_vattach(struct ieee80211vap *vap)
-{
-	struct ieee80211com *ic = vap->iv_ic;
-	struct ieee80211_radiotap_header *th = ic->ic_th;
-
-	if (th != NULL && ic->ic_rh != NULL) {
-		/* radiotap DLT for raw 802.11 frames */
-		bpfattach2(vap->iv_ifp, DLT_IEEE802_11_RADIO,
-		    sizeof(struct ieee80211_frame) + le16toh(th->it_len),
-		    &vap->iv_rawbpf);
-	}
-}
-
 void
 ieee80211_radiotap_vdetach(struct ieee80211vap *vap)
 {
@@ -372,3 +360,115 @@ radiotap_offset(struct ieee80211_radiotap_header *rh,
 	}
 	return -1;
 }
+
+static bool
+bpf_ieee80211_chkdir(void *arg, const struct mbuf *m, int dir)
+{
+	struct ifnet *ifp = arg;
+	struct ifnet *rcvif = m_rcvif(m);
+
+	return ((dir == BPF_D_IN && ifp != rcvif) ||
+	    (dir == BPF_D_OUT && ifp == rcvif));
+}
+
+static void
+bpf_ieee80211_attach(void *sc)
+{
+	struct ieee80211vap *vap = if_getsoftc((if_t)sc);
+
+	/*
+	 * Track bpf radiotap listener state.  We mark the vap
+	 * to indicate if any listener is present and the com
+	 * to indicate if any listener exists on any associated
+	 * vap.  This flag is used by drivers to prepare radiotap
+	 * state only when needed.
+	 */
+	ieee80211_syncflag_ext(vap, IEEE80211_FEXT_BPF);
+	if (vap->iv_opmode == IEEE80211_M_MONITOR)
+		atomic_add_int(&vap->iv_ic->ic_montaps, 1);
+}
+
+static void
+bpf_ieee80211_detach(void *sc)
+{
+	struct ieee80211vap *vap = if_getsoftc((if_t)sc);
+
+	if (!bpf_peers_present(vap->iv_rawbpf)) {
+		ieee80211_syncflag_ext(vap, -IEEE80211_FEXT_BPF);
+		if (vap->iv_opmode == IEEE80211_M_MONITOR)
+			atomic_subtract_int(&vap->iv_ic->ic_montaps, 1);
+	}
+}
+
+static int
+bpf_ieee80211_write(void *arg, struct mbuf *m, struct mbuf *mc, int flags)
+{
+	struct ifnet *ifp = arg;
+	struct sockaddr dst = {
+		.sa_family = AF_IEEE80211,
+		/* XXXGL: value and XXX comment from 246b5467621a */
+		.sa_len = 12,	/* XXX != 0 */
+	};
+	struct route ro = {
+		.ro_prepend = (char *)&dst.sa_data,
+		.ro_flags = RT_HAS_HEADER,
+	};
+	const struct ieee80211_bpf_params *p;
+	u_int hlen;
+	int error;
+
+	NET_EPOCH_ASSERT();
+
+	/*
+	 * Collect true length from the parameter header.
+	 * XXX check ibp_vers
+	 */
+	p = mtod(m, const struct ieee80211_bpf_params *);
+	if (p->ibp_len > sizeof(dst.sa_data)) {
+		m_freem(m);
+		m_freem(mc);
+		return (EMSGSIZE);
+	}
+	hlen = ro.ro_plen = p->ibp_len;
+	bcopy(mtod(m, const void *), &dst.sa_data, hlen);
+	m->m_pkthdr.len -= hlen;
+	m->m_len -= hlen;
+	m->m_data += hlen;
+
+	CURVNET_SET(ifp->if_vnet);
+	error = ifp->if_output(ifp, m, &dst, &ro);
+	if (error != 0) {
+		m_freem(mc);
+	} else if (mc != NULL) {
+		mc->m_pkthdr.rcvif = ifp;
+		(void)ifp->if_input(ifp, mc);
+	}
+	CURVNET_RESTORE();
+
+	return (error);
+}
+
+static const struct bif_methods bpf_ieee80211_methods = {
+	.bif_chkdir = bpf_ieee80211_chkdir,
+	.bif_attachd = bpf_ieee80211_attach,
+	.bif_detachd = bpf_ieee80211_detach,
+	.bif_promisc = bpf_ifnet_promisc,
+	.bif_wrsize = bpf_ifnet_wrsize,
+	.bif_write = bpf_ieee80211_write,
+};
+
+void
+ieee80211_radiotap_vattach(struct ieee80211vap *vap)
+{
+	struct ieee80211com *ic = vap->iv_ic;
+	struct ieee80211_radiotap_header *th = ic->ic_th;
+
+	if (th != NULL && ic->ic_rh != NULL) {
+		/* radiotap DLT for raw 802.11 frames */
+		vap->iv_rawbpf = bpf_attach(if_name(vap->iv_ifp),
+		    DLT_IEEE802_11_RADIO,
+		    sizeof(struct ieee80211_frame) + le16toh(th->it_len),
+		    &bpf_ieee80211_methods, vap->iv_ifp);
+		if_ref(vap->iv_ifp);
+	}
+}