bhyve: Move the slirp backend out into a separate process

The previous implementation implemented hostfwd rules which would allow
the host to connect to the guest via a NATed TCP connection.  libslirp
also permits NAT in the other direction, but this was prevented by
bhyve's capsicum sandbox.

To make the slirp backend more useful, split the backend out into a
separate process which does not enter capability mode if outbound
connections are permitted (enabled by setting the new "open" keyword).
The process communicates with the bhyve network frontend (typically a
virtio network interface) using a unix SOCK_SEQPACKET socket pair.  If
the bhyve process exits, the helper will automatically exit.

Aside from this restructuring, there is not much actual change.  Many
slirp parameters are still hard-coded for now, though this may change.
The "restricted" feature is toggled by the new "open" keyword; in
particular, the backend is restricted by default for compatibility with
15.0 and 14.3.

Each packet now has to traverse an extra socket, but this overhead
should be acceptable given that the slirp backend cannot be said to
provide high-performance networking.  With iperf3 I can get 4Gbps from
the guest to the host on a Zen 4 system.

MFC after:	1 month
Sponsored by:	CHERI Research Centre (EPSRC grant UKRI3001)
Differential Revision:	https://reviews.freebsd.org/D53454
This commit is contained in:
Mark Johnston 2025-11-19 16:02:21 +00:00
parent e22cc773f1
commit 0e62ebd201
6 changed files with 679 additions and 541 deletions

View file

@ -77,6 +77,8 @@ SRCS+= snapshot.c
.include "${MACHINE_CPUARCH}/Makefile.inc"
SUBDIR+= slirp
.if defined(BHYVE_FDT_SUPPORT)
LIBADD+= fdt
CFLAGS+= -I${SRCTOP}/sys/contrib/libfdt

View file

@ -532,7 +532,9 @@ considered unconnected.
.Xc
.It
.Xo
.Cm slirp,hostfwd= Ar proto : Ar hostaddr : Ar hostport - Ar guestaddr : Ar guestport
.Cm slirp
.Op Cm \&,open
.Op Cm \&,hostfwd= Ar proto : Ar hostaddr : Ar hostport - Ar guestaddr : Ar guestport
.Xc
.El
.Sm on
@ -598,6 +600,12 @@ configuration on the host system.
It depends on the
.Pa net/libslirp
port.
If the
.Cm open
keyword is set, the guest will be able to make outbound network
connections, and
.Nm
will transparently handle the necessary address translation.
The
.Cm hostfwd
option takes a 5-tuple describing how connections from the host are to be
@ -605,6 +613,7 @@ forwarded to the guest.
Multiple rules can be specified, separated by semicolons.
Note that semicolons must be escaped or quoted to prevent the shell from
interpreting them.
The backend will provide DHCP and DNS service to the guest.
.Ss Block storage device backends:
.Bl -bullet
.Sm off

View file

@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2023 Mark Johnston <markj@FreeBSD.org>
* Copyright (c) 2023, 2025 Mark Johnston <markj@FreeBSD.org>
*
* This software was developed by the University of Cambridge Computer
* Laboratory (Department of Computer Science and Technology) under Innovate
@ -31,13 +31,15 @@
*/
/*
* The slirp backend enables unprivileged networking via libslirp, which must be
* installed on the host system via pkg or the ports tree. bhyve dlopen()s
* libslirp.so upon instantiating the slirp backend. Various network parameters
* are hard-coded in _slirp_init().
* The slirp backend enables unprivileged userspace networking via libslirp,
* which must be installed on the host system via pkg or the ports tree.
* libslirp.so is dlopen()ed into a helper process with which this backend
* communicates.
*
* Packets received from the guest (i.e., transmitted by the frontend, such as a
* virtio NIC device model) are injected into the slirp backend via slirp_send().
* virtio NIC device model) are injected into the slirp backend via slirp_send(),
* which sends the packet to the helper process.
*
* Packets to be transmitted to the guest (i.e., inserted into the frontend's
* receive buffers) are buffered in a per-interface socket pair and read by the
* mevent loop. Sockets instantiated by libslirp are monitored by a thread
@ -46,14 +48,12 @@
*/
#include <sys/socket.h>
#include <sys/wait.h>
#include <assert.h>
#include <capsicum_helpers.h>
#include <dlfcn.h>
#include <errno.h>
#include <poll.h>
#include <pthread.h>
#include <pthread_np.h>
#include <signal.h>
#include <spawn.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -61,537 +61,92 @@
#include "config.h"
#include "debug.h"
#include "libslirp.h"
#include "mevent.h"
#include "net_backends.h"
#include "net_backends_priv.h"
typedef int (*slirp_add_hostxfwd_p_t)(Slirp *,
const struct sockaddr *, socklen_t, const struct sockaddr *, socklen_t,
int);
typedef void (*slirp_cleanup_p_t)(Slirp *);
typedef void (*slirp_input_p_t)(Slirp *, const uint8_t *, int);
typedef Slirp *(*slirp_new_p_t)(const SlirpConfig *, const SlirpCb *, void *);
typedef void (*slirp_pollfds_fill_p_t)(Slirp *, uint32_t *timeout,
SlirpAddPollCb, void *);
typedef void (*slirp_pollfds_poll_p_t)(Slirp *, int, SlirpGetREventsCb, void *);
#define SLIRP_MTU 2048
/* Function pointer table, initialized by slirp_init_once(). */
static slirp_add_hostxfwd_p_t slirp_add_hostxfwd_p;
static slirp_cleanup_p_t slirp_cleanup_p;
static slirp_input_p_t slirp_input_p;
static slirp_new_p_t slirp_new_p;
static slirp_pollfds_fill_p_t slirp_pollfds_fill_p;
static slirp_pollfds_poll_p_t slirp_pollfds_poll_p;
static void
checked_close(int *fdp)
{
int error;
if (*fdp != -1) {
error = close(*fdp);
assert(error == 0);
*fdp = -1;
}
}
struct slirp_priv {
int s;
pid_t helper;
struct mevent *mevp;
};
static int
slirp_init_once(void)
slirp_init(struct net_backend *be, const char *devname __unused,
nvlist_t *nvl, net_be_rxeof_t cb, void *param)
{
static void *handle = NULL;
extern char **environ;
struct slirp_priv *priv = NET_BE_PRIV(be);
nvlist_t *config;
posix_spawn_file_actions_t fa;
pid_t child;
const char **argv;
char sockname[32];
int error, s[2];
if (handle != NULL)
return (0);
handle = dlopen("libslirp.so.0", RTLD_LAZY);
if (handle == NULL) {
EPRINTLN("Unable to open libslirp.so.0: %s", dlerror());
if (socketpair(PF_LOCAL, SOCK_SEQPACKET | SOCK_NONBLOCK, 0, s) != 0) {
EPRINTLN("socketpair");
return (-1);
}
#define IMPORT_SYM(sym) do { \
sym##_p = (sym##_p_t)dlsym(handle, #sym); \
if (sym##_p == NULL) { \
EPRINTLN("failed to resolve %s", #sym); \
goto err; \
} \
} while (0)
IMPORT_SYM(slirp_add_hostxfwd);
IMPORT_SYM(slirp_cleanup);
IMPORT_SYM(slirp_input);
IMPORT_SYM(slirp_new);
IMPORT_SYM(slirp_pollfds_fill);
IMPORT_SYM(slirp_pollfds_poll);
#undef IMPORT_SYM
/*
* libslirp uses glib, which uses tzdata to format log messages. Help
* it out.
*
* XXX-MJ glib will also look for charset files, not sure what we can do
* about that...
* The child will exit once its connection goes away, so make sure only
* one end is inherited by the child.
*/
caph_cache_tzdata();
return (0);
err:
dlclose(handle);
handle = NULL;
return (-1);
}
struct slirp_priv {
Slirp *slirp;
#define SLIRP_MTU 2048
struct mevent *mevp;
int pipe[2]; /* used to buffer data sent to the guest */
int wakeup[2]; /* used to wake up the pollfd thread */
pthread_t pollfd_td;
struct pollfd *pollfds;
size_t npollfds;
/* Serializes libslirp calls. */
pthread_mutex_t mtx;
};
static void
slirp_priv_init(struct slirp_priv *priv)
{
int error;
memset(priv, 0, sizeof(*priv));
priv->pipe[0] = priv->pipe[1] = -1;
priv->wakeup[0] = priv->wakeup[1] = -1;
error = pthread_mutex_init(&priv->mtx, NULL);
assert(error == 0);
}
static void
slirp_priv_cleanup(struct slirp_priv *priv)
{
int error;
checked_close(&priv->pipe[0]);
checked_close(&priv->pipe[1]);
checked_close(&priv->wakeup[0]);
checked_close(&priv->wakeup[1]);
if (priv->mevp)
mevent_delete(priv->mevp);
if (priv->slirp != NULL)
slirp_cleanup_p(priv->slirp);
error = pthread_mutex_destroy(&priv->mtx);
assert(error == 0);
}
static int64_t
slirp_cb_clock_get_ns(void *param __unused)
{
struct timespec ts;
int error;
error = clock_gettime(CLOCK_MONOTONIC, &ts);
assert(error == 0);
return ((int64_t)(ts.tv_sec * 1000000000L + ts.tv_nsec));
}
static void
slirp_cb_notify(void *param)
{
struct slirp_priv *priv;
/* Wake up the poll thread. We assume that priv->mtx is held here. */
priv = param;
(void)write(priv->wakeup[1], "M", 1);
}
static void
slirp_cb_register_poll_fd(int fd, void *param __unused)
{
const int one = 1;
(void)setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int));
}
static ssize_t
slirp_cb_send_packet(const void *buf, size_t len, void *param)
{
struct slirp_priv *priv;
ssize_t n;
priv = param;
assert(len <= SLIRP_MTU);
n = send(priv->pipe[1], buf, len, 0);
if (n < 0) {
EPRINTLN("slirp_cb_send_packet: send: %s", strerror(errno));
return (n);
if (posix_spawn_file_actions_init(&fa) != 0) {
EPRINTLN("posix_spawn_file_actions_init");
goto err;
}
assert((size_t)n == len);
return (n);
}
static void
slirp_cb_unregister_poll_fd(int fd __unused, void *opaque __unused)
{
}
/* Callbacks invoked from within libslirp. */
static const struct SlirpCb slirp_cbs = {
.clock_get_ns = slirp_cb_clock_get_ns,
.notify = slirp_cb_notify,
.register_poll_fd = slirp_cb_register_poll_fd,
.send_packet = slirp_cb_send_packet,
.unregister_poll_fd = slirp_cb_unregister_poll_fd,
};
static int
slirpev2pollev(int events)
{
int ret;
ret = 0;
if (events & SLIRP_POLL_IN)
ret |= POLLIN;
if (events & SLIRP_POLL_OUT)
ret |= POLLOUT;
if (events & SLIRP_POLL_PRI)
ret |= POLLPRI;
if (events & SLIRP_POLL_ERR)
ret |= POLLERR;
if (events & SLIRP_POLL_HUP)
ret |= POLLHUP;
return (ret);
}
static int
pollev2slirpev(int events)
{
int ret;
ret = 0;
if (events & POLLIN)
ret |= SLIRP_POLL_IN;
if (events & POLLOUT)
ret |= SLIRP_POLL_OUT;
if (events & POLLPRI)
ret |= SLIRP_POLL_PRI;
if (events & POLLERR)
ret |= SLIRP_POLL_ERR;
if (events & POLLHUP)
ret |= SLIRP_POLL_HUP;
return (ret);
}
static int
slirp_addpoll_cb(int fd, int events, void *param)
{
struct slirp_priv *priv;
struct pollfd *pollfd, *pollfds;
size_t i;
priv = param;
for (i = 0; i < priv->npollfds; i++)
if (priv->pollfds[i].fd == -1)
break;
if (i == priv->npollfds) {
const size_t POLLFD_GROW = 4;
priv->npollfds += POLLFD_GROW;
pollfds = realloc(priv->pollfds,
sizeof(*pollfds) * priv->npollfds);
if (pollfds == NULL)
return (-1);
for (i = priv->npollfds - POLLFD_GROW; i < priv->npollfds; i++)
pollfds[i].fd = -1;
priv->pollfds = pollfds;
i = priv->npollfds - POLLFD_GROW;
}
pollfd = &priv->pollfds[i];
pollfd->fd = fd;
pollfd->events = slirpev2pollev(events);
pollfd->revents = 0;
return ((int)i);
}
static int
slirp_poll_revents(int idx, void *param)
{
struct slirp_priv *priv;
struct pollfd *pollfd;
short revents;
priv = param;
assert(idx >= 0);
assert((unsigned int)idx < priv->npollfds);
pollfd = &priv->pollfds[idx];
assert(pollfd->fd != -1);
/* The kernel may report POLLHUP even if we didn't ask for it. */
revents = pollfd->revents;
if ((pollfd->events & POLLHUP) == 0)
revents &= ~POLLHUP;
return (pollev2slirpev(revents));
}
static void *
slirp_pollfd_td_loop(void *param)
{
struct slirp_priv *priv;
struct pollfd *pollfds;
size_t npollfds;
uint32_t timeout;
int error;
pthread_set_name_np(pthread_self(), "slirp pollfd");
priv = param;
pthread_mutex_lock(&priv->mtx);
for (;;) {
int wakeup;
for (size_t i = 0; i < priv->npollfds; i++)
priv->pollfds[i].fd = -1;
/* Register for notifications from slirp_cb_notify(). */
wakeup = slirp_addpoll_cb(priv->wakeup[0], POLLIN, priv);
timeout = UINT32_MAX;
slirp_pollfds_fill_p(priv->slirp, &timeout, slirp_addpoll_cb,
priv);
pollfds = priv->pollfds;
npollfds = priv->npollfds;
pthread_mutex_unlock(&priv->mtx);
error = poll(pollfds, npollfds, timeout);
if (error == -1 && errno != EINTR) {
EPRINTLN("poll: %s", strerror(errno));
exit(1);
}
pthread_mutex_lock(&priv->mtx);
slirp_pollfds_poll_p(priv->slirp, error == -1,
slirp_poll_revents, priv);
/*
* If we were woken up by the notify callback, mask the
* interrupt.
*/
if ((pollfds[wakeup].revents & POLLIN) != 0) {
ssize_t n;
do {
uint8_t b;
n = read(priv->wakeup[0], &b, 1);
} while (n == 1);
if (n != -1 || errno != EAGAIN) {
EPRINTLN("read(wakeup): %s", strerror(errno));
exit(1);
}
}
}
}
static int
parse_addr(char *addr, struct sockaddr_in *sinp)
{
char *port;
int error, porti;
memset(sinp, 0, sizeof(*sinp));
sinp->sin_family = AF_INET;
sinp->sin_len = sizeof(struct sockaddr_in);
port = strchr(addr, ':');
if (port == NULL)
return (EINVAL);
*port++ = '\0';
if (strlen(addr) > 0) {
error = inet_pton(AF_INET, addr, &sinp->sin_addr);
if (error != 1)
return (error == 0 ? EPFNOSUPPORT : errno);
} else {
sinp->sin_addr.s_addr = htonl(INADDR_ANY);
if (posix_spawn_file_actions_addclose(&fa, s[0]) != 0) {
EPRINTLN("posix_spawn_file_actions_addclose");
posix_spawn_file_actions_destroy(&fa);
goto err;
}
porti = strlen(port) > 0 ? atoi(port) : 0;
if (porti < 0 || porti > UINT16_MAX)
return (EINVAL);
sinp->sin_port = htons(porti);
return (0);
}
static int
parse_hostfwd_rule(const char *descr, int *is_udp, struct sockaddr *hostaddr,
struct sockaddr *guestaddr)
{
struct sockaddr_in *hostaddrp, *guestaddrp;
const char *proto;
char *p, *host, *guest;
int error;
error = 0;
*is_udp = 0;
p = strdup(descr);
if (p == NULL)
return (ENOMEM);
host = strchr(p, ':');
if (host == NULL) {
error = EINVAL;
goto out;
}
*host++ = '\0';
proto = p;
*is_udp = strcmp(proto, "udp") == 0;
guest = strchr(host, '-');
if (guest == NULL) {
error = EINVAL;
goto out;
}
*guest++ = '\0';
hostaddrp = (struct sockaddr_in *)hostaddr;
error = parse_addr(host, hostaddrp);
if (error != 0)
goto out;
guestaddrp = (struct sockaddr_in *)guestaddr;
error = parse_addr(guest, guestaddrp);
if (error != 0)
goto out;
out:
free(p);
return (error);
}
static int
config_one_hostfwd(struct slirp_priv *priv, const char *rule)
{
struct sockaddr hostaddr, guestaddr;
int error, is_udp;
error = parse_hostfwd_rule(rule, &is_udp, &hostaddr, &guestaddr);
if (error != 0) {
EPRINTLN("Unable to parse hostfwd rule '%s': %s",
rule, strerror(error));
return (error);
}
error = slirp_add_hostxfwd_p(priv->slirp, &hostaddr, hostaddr.sa_len,
&guestaddr, guestaddr.sa_len, is_udp ? SLIRP_HOSTFWD_UDP : 0);
if (error != 0) {
EPRINTLN("Unable to add hostfwd rule '%s': %s",
rule, strerror(errno));
return (error);
}
return (0);
}
static int
_slirp_init(struct net_backend *be, const char *devname __unused,
nvlist_t *nvl, net_be_rxeof_t cb, void *param)
{
struct slirp_priv *priv = NET_BE_PRIV(be);
SlirpConfig config = {
.version = 4,
.if_mtu = SLIRP_MTU,
.restricted = true,
.in_enabled = true,
.vnetwork.s_addr = htonl(0x0a000200), /* 10.0.2.0/24 */
.vnetmask.s_addr = htonl(0xffffff00),
.vdhcp_start.s_addr = htonl(0x0a00020f),/* 10.0.2.15 */
.vhost.s_addr = htonl(0x0a000202), /* 10.0.2.2 */
.enable_emu = false,
(void)snprintf(sockname, sizeof(sockname), "%d", s[1]);
argv = (const char *[]){
"/usr/libexec/bhyve-slirp-helper", "-S", sockname, NULL
};
const char *hostfwd;
int error, sndbuf;
error = slirp_init_once();
if (error != 0)
return (error);
slirp_priv_init(priv);
priv->slirp = slirp_new_p(&config, &slirp_cbs, priv);
if (priv->slirp == NULL) {
EPRINTLN("Unable to create slirp instance");
goto err;
}
hostfwd = get_config_value_node(nvl, "hostfwd");
if (hostfwd != NULL) {
char *rules, *tofree;
const char *rule;
tofree = rules = strdup(hostfwd);
if (rules == NULL)
goto err;
while ((rule = strsep(&rules, ";")) != NULL) {
error = config_one_hostfwd(priv, rule);
if (error != 0) {
free(tofree);
goto err;
}
}
free(tofree);
}
error = socketpair(PF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0, priv->pipe);
error = posix_spawn(&child, "/usr/libexec/bhyve-slirp-helper",
&fa, NULL, __DECONST(char **, argv), environ);
posix_spawn_file_actions_destroy(&fa);
if (error != 0) {
EPRINTLN("Unable to create pipe: %s", strerror(errno));
EPRINTLN("posix_spawn(bhyve-slirp-helper): %s",
strerror(error));
goto err;
}
error = pipe2(priv->wakeup, O_CLOEXEC | O_NONBLOCK);
config = nvlist_clone(nvl);
if (config == NULL) {
EPRINTLN("nvlist_clone");
goto err;
}
nvlist_add_string(config, "vmname", get_config_value("name"));
error = nvlist_send(s[0], config);
nvlist_destroy(config);
if (error != 0) {
EPRINTLN("Unable to create wakeup pipe: %s", strerror(errno));
EPRINTLN("nvlist_send");
goto err;
}
/*
* Try to avoid dropping buffered packets in slirp_cb_send_packet().
*/
sndbuf = 1024 * 1024;
error = setsockopt(priv->pipe[1], SOL_SOCKET, SO_SNDBUF, &sndbuf,
sizeof(sndbuf));
if (error != 0) {
EPRINTLN("Could not set socket buffer size: %s",
strerror(errno));
goto err;
}
be->fd = priv->pipe[0];
be->fd = s[0];
priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
if (priv->mevp == NULL) {
EPRINTLN("Could not register event");
goto err;
}
error = pthread_create(&priv->pollfd_td, NULL, slirp_pollfd_td_loop,
priv);
if (error != 0) {
EPRINTLN("Unable to create pollfd thread: %s", strerror(error));
goto err;
}
priv->helper = child;
priv->s = s[0];
(void)close(s[1]);
return (0);
err:
slirp_priv_cleanup(priv);
(void)close(s[0]);
(void)close(s[1]);
return (-1);
}
@ -599,52 +154,43 @@ static ssize_t
slirp_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
struct slirp_priv *priv = NET_BE_PRIV(be);
struct msghdr hdr;
if (iovcnt == 1) {
/* We can avoid copying if there's a single segment. */
pthread_mutex_lock(&priv->mtx);
slirp_input_p(priv->slirp, iov->iov_base,
(int)iov->iov_len);
pthread_mutex_unlock(&priv->mtx);
return (iov[0].iov_len);
} else {
uint8_t *pkt;
size_t pktlen;
pktlen = 0;
for (int i = 0; i < iovcnt; i++)
pktlen += iov[i].iov_len;
pkt = malloc(pktlen);
if (pkt == NULL)
return (-1);
pktlen = 0;
for (int i = 0; i < iovcnt; i++) {
memcpy(pkt + pktlen, iov[i].iov_base, iov[i].iov_len);
pktlen += iov[i].iov_len;
}
pthread_mutex_lock(&priv->mtx);
slirp_input_p(priv->slirp, pkt, (int)pktlen);
pthread_mutex_unlock(&priv->mtx);
free(pkt);
return (pktlen);
}
memset(&hdr, 0, sizeof(hdr));
hdr.msg_iov = __DECONST(struct iovec *, iov);
hdr.msg_iovlen = iovcnt;
return (sendmsg(priv->s, &hdr, MSG_EOR));
}
static void
_slirp_cleanup(struct net_backend *be)
slirp_cleanup(struct net_backend *be)
{
struct slirp_priv *priv = NET_BE_PRIV(be);
slirp_priv_cleanup(priv);
if (priv->helper > 0) {
int status;
if (kill(priv->helper, SIGKILL) != 0) {
EPRINTLN("kill(bhyve-slirp-helper): %s",
strerror(errno));
return;
}
(void)waitpid(priv->helper, &status, 0);
}
}
static ssize_t
slirp_peek_recvlen(struct net_backend *be)
{
struct slirp_priv *priv = NET_BE_PRIV(be);
uint8_t buf[SLIRP_MTU];
ssize_t n;
n = recv(priv->pipe[0], NULL, 0, MSG_PEEK | MSG_DONTWAIT | MSG_TRUNC);
/*
* Copying into the buffer is totally unnecessary, but we don't
* implement MSG_TRUNC for SEQPACKET sockets.
*/
n = recv(priv->s, buf, sizeof(buf), MSG_PEEK | MSG_DONTWAIT);
if (n < 0)
return (errno == EWOULDBLOCK ? 0 : -1);
assert((size_t)n <= SLIRP_MTU);
@ -665,7 +211,7 @@ slirp_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
hdr.msg_control = NULL;
hdr.msg_controllen = 0;
hdr.msg_flags = 0;
n = recvmsg(priv->pipe[0], &hdr, MSG_DONTWAIT);
n = recvmsg(priv->s, &hdr, MSG_DONTWAIT);
if (n < 0) {
if (errno == EWOULDBLOCK)
return (0);
@ -707,8 +253,8 @@ slirp_set_cap(struct net_backend *be __unused, uint64_t features __unused,
static struct net_backend slirp_backend = {
.prefix = "slirp",
.priv_size = sizeof(struct slirp_priv),
.init = _slirp_init,
.cleanup = _slirp_cleanup,
.init = slirp_init,
.cleanup = slirp_cleanup,
.send = slirp_send,
.peek_recvlen = slirp_peek_recvlen,
.recv = slirp_recv,

View file

@ -0,0 +1,11 @@
.PATH: ${.CURDIR}/../
PROG= bhyve-slirp-helper
SRCS= slirp-helper.c config.c
CFLAGS+= -I${.CURDIR}/../
LIBADD= nv
MAN=
BINDIR?= /usr/libexec
.include <bsd.prog.mk>

View file

@ -0,0 +1,570 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2023, 2025 Mark Johnston <markj@FreeBSD.org>
*
* This software was developed by the University of Cambridge Computer
* Laboratory (Department of Computer Science and Technology) under Innovate
* UK project 105694, "Digital Security by Design (DSbD) Technology Platform
* Prototype".
*/
/*
* A helper process which lets bhyve's libslirp-based network backend work
* outside bhyve's Capsicum sandbox. We are started with a SOCK_SEQPACKET
* socket through which we pass and receive packets from the guest's frontend.
*
* At initialization time, we receive an nvlist over the socket which describes
* the desired slirp configuration.
*/
#include <sys/nv.h>
#include <sys/socket.h>
#include <assert.h>
#include <capsicum_helpers.h>
#include <dlfcn.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
#include <pwd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include "config.h"
#include "libslirp.h"
#define SLIRP_MTU 2048
struct slirp_priv {
Slirp *slirp; /* libslirp handle */
int sock; /* data and control socket */
int wakeup[2]; /* used to wake up the pollfd thread */
struct pollfd *pollfds;
size_t npollfds;
size_t lastpollfd;
};
typedef int (*slirp_add_hostxfwd_p_t)(Slirp *,
const struct sockaddr *, socklen_t, const struct sockaddr *, socklen_t,
int);
typedef void (*slirp_cleanup_p_t)(Slirp *);
typedef void (*slirp_input_p_t)(Slirp *, const uint8_t *, int);
typedef Slirp *(*slirp_new_p_t)(const SlirpConfig *, const SlirpCb *, void *);
typedef void (*slirp_pollfds_fill_p_t)(Slirp *, uint32_t *timeout,
SlirpAddPollCb, void *);
typedef void (*slirp_pollfds_poll_p_t)(Slirp *, int, SlirpGetREventsCb, void *);
/* Function pointer table, initialized by libslirp_init(). */
static slirp_add_hostxfwd_p_t slirp_add_hostxfwd_p;
static slirp_cleanup_p_t slirp_cleanup_p;
static slirp_input_p_t slirp_input_p;
static slirp_new_p_t slirp_new_p;
static slirp_pollfds_fill_p_t slirp_pollfds_fill_p;
static slirp_pollfds_poll_p_t slirp_pollfds_poll_p;
static int64_t
slirp_cb_clock_get_ns(void *param __unused)
{
struct timespec ts;
int error;
error = clock_gettime(CLOCK_MONOTONIC, &ts);
assert(error == 0);
return ((int64_t)(ts.tv_sec * 1000000000L + ts.tv_nsec));
}
static void
slirp_cb_notify(void *param)
{
struct slirp_priv *priv;
/* Wake up the poll thread. We assume that priv->mtx is held here. */
priv = param;
(void)write(priv->wakeup[1], "M", 1);
}
static void
slirp_cb_register_poll_fd(int fd, void *param __unused)
{
const int one = 1;
(void)setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int));
}
static ssize_t
slirp_cb_send_packet(const void *buf, size_t len, void *param)
{
struct slirp_priv *priv;
ssize_t n;
priv = param;
assert(len <= SLIRP_MTU);
n = send(priv->sock, buf, len, MSG_EOR);
if (n < 0) {
warn("slirp_cb_send_packet: send");
return (n);
}
assert((size_t)n == len);
return (n);
}
static void
slirp_cb_unregister_poll_fd(int fd __unused, void *opaque __unused)
{
}
/* Callbacks invoked from within libslirp. */
static const struct SlirpCb slirp_cbs = {
.clock_get_ns = slirp_cb_clock_get_ns,
.notify = slirp_cb_notify,
.register_poll_fd = slirp_cb_register_poll_fd,
.send_packet = slirp_cb_send_packet,
.unregister_poll_fd = slirp_cb_unregister_poll_fd,
};
static int
slirpev2pollev(int events)
{
int ret;
ret = 0;
if (events & SLIRP_POLL_IN)
ret |= POLLIN;
if (events & SLIRP_POLL_OUT)
ret |= POLLOUT;
if (events & SLIRP_POLL_PRI)
ret |= POLLPRI;
if (events & SLIRP_POLL_ERR)
ret |= POLLERR;
if (events & SLIRP_POLL_HUP)
ret |= POLLHUP;
return (ret);
}
static int
pollev2slirpev(int events)
{
int ret;
ret = 0;
if (events & POLLIN)
ret |= SLIRP_POLL_IN;
if (events & POLLOUT)
ret |= SLIRP_POLL_OUT;
if (events & POLLPRI)
ret |= SLIRP_POLL_PRI;
if (events & POLLERR)
ret |= SLIRP_POLL_ERR;
if (events & POLLHUP)
ret |= SLIRP_POLL_HUP;
return (ret);
}
static int
slirp_addpoll(struct slirp_priv *priv, int fd, int events)
{
struct pollfd *pollfd, *pollfds;
size_t i;
for (i = priv->lastpollfd + 1; i < priv->npollfds; i++)
if (priv->pollfds[i].fd == -1)
break;
if (i == priv->npollfds) {
const size_t POLLFD_GROW = 4;
priv->npollfds += POLLFD_GROW;
pollfds = realloc(priv->pollfds,
sizeof(*pollfds) * priv->npollfds);
if (pollfds == NULL)
return (-1);
for (i = priv->npollfds - POLLFD_GROW; i < priv->npollfds; i++)
pollfds[i].fd = -1;
priv->pollfds = pollfds;
i = priv->npollfds - POLLFD_GROW;
}
pollfd = &priv->pollfds[i];
pollfd->fd = fd;
pollfd->events = slirpev2pollev(events);
pollfd->revents = 0;
priv->lastpollfd = i;
return ((int)i);
}
static int
slirp_addpoll_cb(int fd, int events, void *param)
{
struct slirp_priv *priv;
priv = param;
return (slirp_addpoll(priv, fd, events));
}
static int
slirp_poll_revents(int idx, void *param)
{
struct slirp_priv *priv;
struct pollfd *pollfd;
short revents;
priv = param;
assert(idx >= 0);
assert((unsigned int)idx < priv->npollfds);
pollfd = &priv->pollfds[idx];
assert(pollfd->fd != -1);
/* The kernel may report POLLHUP even if we didn't ask for it. */
revents = pollfd->revents;
if ((pollfd->events & POLLHUP) == 0)
revents &= ~POLLHUP;
return (pollev2slirpev(revents));
}
/*
* Main loop. Poll libslirp's descriptors plus a couple of our own.
*/
static void
slirp_pollfd_loop(struct slirp_priv *priv)
{
struct pollfd *pollfds;
size_t npollfds;
uint32_t timeout;
int error;
for (;;) {
int input, wakeup;
for (size_t i = 0; i < priv->npollfds; i++)
priv->pollfds[i].fd = -1;
priv->lastpollfd = -1;
/* Register for notifications from slirp_cb_notify(). */
wakeup = slirp_addpoll(priv, priv->wakeup[0], POLLIN);
/* Register for input from our parent process. */
input = slirp_addpoll(priv, priv->sock, POLLIN | POLLRDHUP);
timeout = UINT32_MAX;
slirp_pollfds_fill_p(priv->slirp, &timeout, slirp_addpoll_cb,
priv);
pollfds = priv->pollfds;
npollfds = priv->npollfds;
error = poll(pollfds, npollfds, timeout);
if (error == -1 && errno != EINTR)
err(1, "poll");
slirp_pollfds_poll_p(priv->slirp, error == -1,
slirp_poll_revents, priv);
/*
* If we were woken up by the notify callback, mask the
* interrupt.
*/
if ((pollfds[wakeup].revents & POLLIN) != 0) {
ssize_t n;
do {
uint8_t b;
n = read(priv->wakeup[0], &b, 1);
} while (n == 1);
if (n != -1 || errno != EAGAIN)
err(1, "read");
}
/*
* If new packets arrived from our parent, feed them to
* libslirp.
*/
if ((pollfds[input].revents & (POLLHUP | POLLRDHUP)) != 0)
errx(1, "parent process closed connection");
if ((pollfds[input].revents & POLLIN) != 0) {
ssize_t n;
do {
uint8_t buf[SLIRP_MTU];
n = recv(priv->sock, buf, sizeof(buf),
MSG_DONTWAIT);
if (n < 0) {
if (errno == EWOULDBLOCK)
break;
err(1, "recv");
}
slirp_input_p(priv->slirp, buf, (int)n);
} while (n >= 0);
}
}
}
static int
parse_addr(char *addr, struct sockaddr_in *sinp)
{
char *port;
int error, porti;
memset(sinp, 0, sizeof(*sinp));
sinp->sin_family = AF_INET;
sinp->sin_len = sizeof(struct sockaddr_in);
port = strchr(addr, ':');
if (port == NULL)
return (EINVAL);
*port++ = '\0';
if (strlen(addr) > 0) {
error = inet_pton(AF_INET, addr, &sinp->sin_addr);
if (error != 1)
return (error == 0 ? EPFNOSUPPORT : errno);
} else {
sinp->sin_addr.s_addr = htonl(INADDR_ANY);
}
porti = strlen(port) > 0 ? atoi(port) : 0;
if (porti < 0 || porti > UINT16_MAX)
return (EINVAL);
sinp->sin_port = htons(porti);
return (0);
}
static int
parse_hostfwd_rule(const char *descr, int *is_udp, struct sockaddr *hostaddr,
struct sockaddr *guestaddr)
{
struct sockaddr_in *hostaddrp, *guestaddrp;
const char *proto;
char *p, *host, *guest;
int error;
error = 0;
*is_udp = 0;
p = strdup(descr);
if (p == NULL)
return (ENOMEM);
host = strchr(p, ':');
if (host == NULL) {
error = EINVAL;
goto out;
}
*host++ = '\0';
proto = p;
*is_udp = strcmp(proto, "udp") == 0;
guest = strchr(host, '-');
if (guest == NULL) {
error = EINVAL;
goto out;
}
*guest++ = '\0';
hostaddrp = (struct sockaddr_in *)(void *)hostaddr;
error = parse_addr(host, hostaddrp);
if (error != 0)
goto out;
guestaddrp = (struct sockaddr_in *)(void *)guestaddr;
error = parse_addr(guest, guestaddrp);
if (error != 0)
goto out;
out:
free(p);
return (error);
}
static void
config_one_hostfwd(Slirp *slirp, const char *rule)
{
struct sockaddr hostaddr, guestaddr;
int error, is_udp;
error = parse_hostfwd_rule(rule, &is_udp, &hostaddr, &guestaddr);
if (error != 0)
errx(1, "unable to parse hostfwd rule '%s': %s", rule,
strerror(error));
error = slirp_add_hostxfwd_p(slirp, &hostaddr, hostaddr.sa_len,
&guestaddr, guestaddr.sa_len, is_udp ? SLIRP_HOSTFWD_UDP : 0);
if (error != 0)
errx(1, "Unable to add hostfwd rule '%s': %s", rule,
strerror(errno));
}
/*
* Drop privileges to the "nobody" user. Ideally we'd chroot to somewhere like
* /var/empty but libslirp might need to access /etc/resolv.conf.
*/
static void
drop_privs(void)
{
struct passwd *pw;
if (geteuid() != 0)
return;
pw = getpwnam("nobody");
if (pw == NULL)
err(1, "getpwnam(nobody) failed");
if (initgroups(pw->pw_name, pw->pw_gid) != 0)
err(1, "initgroups");
if (setgid(pw->pw_gid) != 0)
err(1, "setgid");
if (setuid(pw->pw_uid) != 0)
err(1, "setuid");
}
static void
libslirp_init(void)
{
void *handle;
handle = dlopen("libslirp.so.0", RTLD_LAZY);
if (handle == NULL)
errx(1, "unable to open libslirp.so.0: %s", dlerror());
#define IMPORT_SYM(sym) do { \
sym##_p = (sym##_p_t)dlsym(handle, #sym); \
if (sym##_p == NULL) \
errx(1, "failed to resolve %s", #sym); \
} while (0)
IMPORT_SYM(slirp_add_hostxfwd);
IMPORT_SYM(slirp_cleanup);
IMPORT_SYM(slirp_input);
IMPORT_SYM(slirp_new);
IMPORT_SYM(slirp_pollfds_fill);
IMPORT_SYM(slirp_pollfds_poll);
#undef IMPORT_SYM
}
static void
usage(void)
{
fprintf(stderr, "Usage: slirp-helper -S <socket>\n");
exit(1);
}
int
main(int argc, char **argv)
{
struct slirp_priv priv;
SlirpConfig slirpconfig;
Slirp *slirp;
nvlist_t *config;
const char *hostfwd, *vmname;
int ch, fd, sd;
bool restricted;
sd = -1;
while ((ch = getopt(argc, argv, "S:")) != -1) {
switch (ch) {
case 'S':
sd = atoi(optarg);
if (fcntl(sd, F_GETFD) == -1)
err(1, "invalid socket %s", optarg);
break;
default:
usage();
/* NOTREACHED */
}
}
argc -= optind;
argv += optind;
if (sd == -1)
usage();
/*
* Clean the fd space: point stdio to /dev/null and keep our socket.
*/
fd = open("/dev/null", O_RDWR);
if (fd == -1)
err(1, "open(/dev/null)");
if (dup2(fd, STDIN_FILENO) == -1)
err(1, "dup2(stdin)");
if (dup2(fd, STDOUT_FILENO) == -1)
err(1, "dup2(stdout)");
if (dup2(fd, STDERR_FILENO) == -1)
err(1, "dup2(stderr)");
if (dup2(sd, 3) == -1)
err(1, "dup2(slirp socket)");
sd = 3;
closefrom(sd + 1);
memset(&priv, 0, sizeof(priv));
priv.sock = sd;
if (pipe2(priv.wakeup, O_CLOEXEC | O_NONBLOCK) != 0)
err(1, "pipe2");
/*
* Apply the configuration we received from bhyve.
*/
config = nvlist_recv(sd, 0);
if (config == NULL)
err(1, "nvlist_recv");
vmname = get_config_value_node(config, "vmname");
if (vmname != NULL)
setproctitle("%s", vmname);
restricted = !get_config_bool_node_default(config, "open", false);
slirpconfig = (SlirpConfig){
.version = 4,
.if_mtu = SLIRP_MTU,
.restricted = restricted,
.in_enabled = true,
.vnetwork.s_addr = htonl(0x0a000200), /* 10.0.2.0/24 */
.vnetmask.s_addr = htonl(0xffffff00), /* 255.255.255.0 */
.vdhcp_start.s_addr = htonl(0x0a00020f),/* 10.0.2.15 */
.vhost.s_addr = htonl(0x0a000202), /* 10.0.2.2 */
.vnameserver.s_addr = htonl(0x0a000203),/* 10.0.2.3 */
.enable_emu = false,
};
libslirp_init();
slirp = slirp_new_p(&slirpconfig, &slirp_cbs, &priv);
hostfwd = get_config_value_node(config, "hostfwd");
if (hostfwd != NULL) {
char *rules, *tofree;
const char *rule;
tofree = rules = strdup(hostfwd);
if (rules == NULL)
err(1, "strdup");
while ((rule = strsep(&rules, ";")) != NULL)
config_one_hostfwd(slirp, rule);
free(tofree);
}
priv.slirp = slirp;
/*
* In restricted mode, we can enter a Capsicum sandbox without losing
* functionality.
*/
if (restricted && caph_enter() != 0)
err(1, "caph_enter");
/*
* Drop root privileges if we have them.
*/
drop_privs();
/*
* Enter our main loop. If bhyve goes away, we should observe a hangup
* on the socket and exit.
*/
slirp_pollfd_loop(&priv);
/* NOTREACHED */
return (1);
}