jail: add jail descriptors

Similar to process descriptors, jail desriptors are allow jail
administration using the file descriptor interface instead of JIDs.
They come from and can be used by jail_set(2) and jail_get(2),
and there are two new system calls, jail_attach_jd(2) and
jail_remove_jd(2).

Reviewed by:	bz, brooks
Relnotes:	yes
Differential Revision:	https://reviews.freebsd.org/D43696
This commit is contained in:
Jamie Gritton 2025-09-04 13:27:47 -07:00
parent 2a346c8993
commit 851dc7f859
24 changed files with 1256 additions and 54 deletions

View file

@ -75,8 +75,9 @@ int
jail_setv(int flags, ...)
{
va_list ap, tap;
struct jailparam *jp;
const char *name, *value;
struct jailparam *jp, *jp_desc;
const char *name;
char *value, *desc_value;
int njp, jid;
/* Create the parameter list and import the parameters. */
@ -86,15 +87,24 @@ jail_setv(int flags, ...)
(void)va_arg(tap, char *);
va_end(tap);
jp = alloca(njp * sizeof(struct jailparam));
for (njp = 0; (name = va_arg(ap, char *)) != NULL;) {
jp_desc = NULL;
desc_value = NULL;
for (njp = 0; (name = va_arg(ap, char *)) != NULL; njp++) {
value = va_arg(ap, char *);
if (jailparam_init(jp + njp, name) < 0)
goto error;
if (jailparam_import(jp + njp++, value) < 0)
if (jailparam_import(jp + njp, value) < 0)
goto error;
if (!strcmp(name, "desc")
&& (flags & (JAIL_GET_DESC | JAIL_OWN_DESC))) {
jp_desc = jp + njp;
desc_value = value;
}
}
va_end(ap);
jid = jailparam_set(jp, njp, flags);
if (jid > 0 && jp_desc != NULL)
sprintf(desc_value, "%d", *(int *)jp_desc->jp_value);
jailparam_free(jp, njp);
return (jid);
@ -112,9 +122,10 @@ int
jail_getv(int flags, ...)
{
va_list ap, tap;
struct jailparam *jp, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
struct jailparam *jp, *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
char *valarg, *value;
const char *name, *key_value, *lastjid_value, *jid_value, *name_value;
const char *name, *key_value, *desc_value, *lastjid_value, *jid_value;
const char *name_value;
int njp, i, jid;
/* Create the parameter list and find the key. */
@ -126,15 +137,19 @@ jail_getv(int flags, ...)
jp = alloca(njp * sizeof(struct jailparam));
va_copy(tap, ap);
jp_lastjid = jp_jid = jp_name = NULL;
lastjid_value = jid_value = name_value = NULL;
jp_desc = jp_lastjid = jp_jid = jp_name = NULL;
desc_value = lastjid_value = jid_value = name_value = NULL;
for (njp = 0; (name = va_arg(tap, char *)) != NULL; njp++) {
value = va_arg(tap, char *);
if (jailparam_init(jp + njp, name) < 0) {
va_end(tap);
goto error;
}
if (!strcmp(jp[njp].jp_name, "lastjid")) {
if (!strcmp(jp[njp].jp_name, "desc")
&& (flags & (JAIL_USE_DESC | JAIL_AT_DESC))) {
jp_desc = jp + njp;
desc_value = value;
} else if (!strcmp(jp[njp].jp_name, "lastjid")) {
jp_lastjid = jp + njp;
lastjid_value = value;
} else if (!strcmp(jp[njp].jp_name, "jid")) {
@ -147,7 +162,10 @@ jail_getv(int flags, ...)
}
va_end(tap);
/* Import the key parameter. */
if (jp_lastjid != NULL) {
if (jp_desc != NULL && (flags & JAIL_USE_DESC)) {
jp_key = jp_desc;
key_value = desc_value;
} else if (jp_lastjid != NULL) {
jp_key = jp_lastjid;
key_value = lastjid_value;
} else if (jp_jid != NULL && strtol(jid_value, NULL, 10) != 0) {
@ -163,6 +181,9 @@ jail_getv(int flags, ...)
}
if (jailparam_import(jp_key, key_value) < 0)
goto error;
if (jp_desc != NULL && jp_desc != jp_key
&& jailparam_import(jp_desc, desc_value) < 0)
goto error;
/* Get the jail and export the parameters. */
jid = jailparam_get(jp, njp, flags);
if (jid < 0)
@ -571,7 +592,7 @@ int
jailparam_get(struct jailparam *jp, unsigned njp, int flags)
{
struct iovec *jiov;
struct jailparam *jp_lastjid, *jp_jid, *jp_name, *jp_key;
struct jailparam *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
int i, ai, ki, jid, arrays, sanity;
unsigned j;
@ -580,10 +601,13 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
* Find the key and any array parameters.
*/
jiov = alloca(sizeof(struct iovec) * 2 * (njp + 1));
jp_lastjid = jp_jid = jp_name = NULL;
jp_desc = jp_lastjid = jp_jid = jp_name = NULL;
arrays = 0;
for (ai = j = 0; j < njp; j++) {
if (!strcmp(jp[j].jp_name, "lastjid"))
if (!strcmp(jp[j].jp_name, "desc")
&& (flags & (JAIL_USE_DESC | JAIL_AT_DESC)))
jp_desc = jp + j;
else if (!strcmp(jp[j].jp_name, "lastjid"))
jp_lastjid = jp + j;
else if (!strcmp(jp[j].jp_name, "jid"))
jp_jid = jp + j;
@ -599,7 +623,9 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
ai++;
}
}
jp_key = jp_lastjid ? jp_lastjid :
jp_key = jp_desc && jp_desc->jp_valuelen == sizeof(int) &&
jp_desc->jp_value && (flags & JAIL_USE_DESC) ? jp_desc :
jp_lastjid ? jp_lastjid :
jp_jid && jp_jid->jp_valuelen == sizeof(int) &&
jp_jid->jp_value && *(int *)jp_jid->jp_value ? jp_jid : jp_name;
if (jp_key == NULL || jp_key->jp_value == NULL) {
@ -622,6 +648,14 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
jiov[ki].iov_len = JAIL_ERRMSGLEN;
ki++;
jail_errmsg[0] = 0;
if (jp_desc != NULL && jp_desc != jp_key) {
jiov[ki].iov_base = jp_desc->jp_name;
jiov[ki].iov_len = strlen(jp_desc->jp_name) + 1;
ki++;
jiov[ki].iov_base = jp_desc->jp_value;
jiov[ki].iov_len = jp_desc->jp_valuelen;
ki++;
}
if (arrays && jail_get(jiov, ki, flags) < 0) {
if (!jail_errmsg[0])
snprintf(jail_errmsg, sizeof(jail_errmsg),
@ -649,7 +683,7 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
jiov[ai].iov_base = jp[j].jp_value;
memset(jiov[ai].iov_base, 0, jiov[ai].iov_len);
ai++;
} else if (jp + j != jp_key) {
} else if (jp + j != jp_key && jp + j != jp_desc) {
jiov[i].iov_base = jp[j].jp_name;
jiov[i].iov_len = strlen(jp[j].jp_name) + 1;
i++;

View file

@ -382,6 +382,8 @@ FBSD_1.8 {
getrlimitusage;
inotify_add_watch_at;
inotify_rm_watch;
jail_attach_jd;
jail_remove_jd;
kcmp;
setcred;
setgroups;

View file

@ -468,6 +468,8 @@ typedef int (__sys_inotify_add_watch_at_t)(int, int, const char *, uint32_t);
typedef int (__sys_inotify_rm_watch_t)(int, int);
typedef int (__sys_getgroups_t)(int, gid_t *);
typedef int (__sys_setgroups_t)(int, const gid_t *);
typedef int (__sys_jail_attach_jd_t)(int);
typedef int (__sys_jail_remove_jd_t)(int);
_Noreturn void __sys__exit(int rval);
int __sys_fork(void);
@ -872,6 +874,8 @@ int __sys_inotify_add_watch_at(int fd, int dfd, const char * path, uint32_t mask
int __sys_inotify_rm_watch(int fd, int wd);
int __sys_getgroups(int gidsetsize, gid_t * gidset);
int __sys_setgroups(int gidsetsize, const gid_t * gidset);
int __sys_jail_attach_jd(int fd);
int __sys_jail_remove_jd(int fd);
__END_DECLS
#endif /* __LIBSYS_H_ */

View file

@ -23,7 +23,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.Dd November 29, 2023
.Dd September 4, 2025
.Dt JAIL 2
.Os
.Sh NAME
@ -31,7 +31,9 @@
.Nm jail_get ,
.Nm jail_set ,
.Nm jail_remove ,
.Nm jail_attach
.Nm jail_attach ,
.Nm jail_remove_jd ,
.Nm jail_attach_jd
.Nd create and manage system jails
.Sh LIBRARY
.Lb libc
@ -44,6 +46,10 @@
.Fn jail_attach "int jid"
.Ft int
.Fn jail_remove "int jid"
.Ft int
.Fn jail_attach_jd "int fd"
.Ft int
.Fn jail_remove_jd "int fd"
.In sys/uio.h
.Ft int
.Fn jail_get "struct iovec *iov" "u_int niov" "int flags"
@ -188,6 +194,29 @@ system call.
This is deprecated in
.Fn jail_set
and has no effect.
.It Dv JAIL_USE_DESC
Identify the jail by a descriptor in the
.Va desc
parameter.
.It Dv JAIL_AT_DESC
Operate in the context of the jail described by the
.Va desc
parameter, instead of the current jail.
Only one of
.Dv JAIL_USE_DESC
or
.Dv JAIL_AT_DESC
may be specified.
.It Dv JAIL_GET_DESC
Return a new jail descriptor for the jail in the
.Va desc
parameter.
.It Dv JAIL_OWN_DESC
Return an
.Dq owning
jail descriptor in the
.Va desc
parameter.
.El
.Pp
The
@ -221,6 +250,9 @@ arguments consists of one or more following flags:
.Bl -tag -width indent
.It Dv JAIL_DYING
Allow getting a jail that is in the process of being removed.
.It Dv JAIL_USE_DESC , Dv JAIL_AT_DESC , Dv JAIL_GET_DESC , Dv JAIL_OWN_DESC
These have the same meaning as they do in
.Fn jail_set .
.El
.Pp
The
@ -238,6 +270,101 @@ system call removes the jail identified by
.Fa jid .
It will kill all processes belonging to the jail, and remove any children
of that jail.
.Pp
The
.Fn jail_attach_fd
and
.Fn jail_remove_fd
system calls work the same as
.Fn jail_attach
and
.Fn jail_remove ,
except that they operate on the jail identified by jail descriptor
.Fa fd .
.Ss Jail Descriptors
In addition to the jail ID,
jails can be referred to using a jail descriptor,
a type of file descriptor tied to a particular jail.
Jail descriptors are created by calling
.Fn jail_set
or
.Fn jail_get
with the special parameter
.Va desc ,
and either the
.Dv JAIL_GET_DESC
or
.Dv JAIL_OWN_DESC
flags set.
The difference between the two flags is that descriptors created with
.Dv JAIL_OWN_DESC
.Po
called
.Dq owning
descriptors
.Pc
will automatically remove the jail when the descriptor is closed.
.Pp
Jail descriptors can be passed back to
.Fn jail_set
or
.Fm jail_get
with the
.Va desc
parameter,
and either the
.Dv JAIL_USE_DESC
or
.Dv JAIL_AT_DESC
flags set.
With
.Dv JAIL_USE_DESC ,
the descriptor identifies the jail to operate on,
instead of the
.Va jid
or
.Va name
parameter.
With
.Dv JAIL_AT_DESC ,
the descriptor is used in place of the current jail,
allowing accessing or creating jails that are children of the
descriptor jail.
.Pp
The system calls
.Fn jail_attach_jd
and
.Fn jail_aremove_jd
work the same as
.Fn jail_attach
and
.Fn jail_remove ,
except that they operate on the jail referred to by the passed descriptor.
.Pp
Jail operations via descriptors can be done by processes that do not
normally have permission to see or affect the jail,
as long as they are allowed by the file permissions of the jail
descriptor itself.
These permissions can be changed by the descriptor owner via
.Xr fchmod 2
and
.Xr fchown 2 .
.Fn jail_get
requires read permission,
.Fn jail_set
and
.Fn jail_remove
require write permission,
and
.Fn jail_attach
requires execute permission.
Also, use of a descriptor with the
.Dv JAIL_AT_DESC
flag requires execute permission.
An owning descriptor is identified by the
.Em sticky bit ,
which may also be changed via
.Xr fchmod 2 .
.Sh RETURN VALUES
If successful,
.Fn jail ,
@ -249,7 +376,7 @@ They return \-1 on failure, and set
.Va errno
to indicate the error.
.Pp
.Rv -std jail_attach jail_remove
.Rv -std jail_attach jail_remove jail_attach_jd jail_remove_jd
.Sh ERRORS
The
.Fn jail
@ -275,12 +402,44 @@ The
system call
will fail if:
.Bl -tag -width Er
.It Bq Er EBADF
The
.Va desc
parameter does not refer to a valid jail descriptor,
and either the
.Dv JAIL_USE_DESC
or
.Dv JAIL_AT_DESC
flag was set.
.It Bq Er EACCES
Write permission is denied on the jail descriptor in the
.Va desc
parameter,
and the
.Dv JAIL_USE_DESC
flag was set.
.It Bq Er EACCES
Execute permission is denied on the jail descriptor in the
.Va desc
parameter,
and either the
.Dv JAIL_AT_DESC
or
.Dv JAIL_ATTACH
flag was set.
.It Bq Er EPERM
This process is not allowed to create a jail, either because it is not
the super-user, or because it would exceed the jail's
.Va children.max
limit.
.It Bq Er EPERM
The jail descriptor in the
.Va desc
parameter was created by a user other than the super-user,
and the
.Dv JAIL_USE_DESC
flag was set.
.It Bq Er EPERM
A jail parameter was set to a less restrictive value then the current
environment.
.It Bq Er EFAULT
@ -298,8 +457,12 @@ flag is not set.
.It Bq Er ENOENT
The jail referred to by a
.Va jid
is not accessible by the process, because the process is in a different
jail.
parameter is not accessible by the process, because the process is in a
different jail.
.It Bq Er ENOENT
The jail referred to by a
.Va desc
parameter has been removed.
.It Bq Er EEXIST
The jail referred to by a
.Va jid
@ -326,6 +489,24 @@ flags is not set.
A supplied string parameter is longer than allowed.
.It Bq Er EAGAIN
There are no jail IDs left.
.It Bq Er EMFILE
A jail descriptor could not be created for the
.Va desc
parameter with either the
.Dv JAIL_GET_DESC
or
.Dv JAIL_OWN_DESC
flag set,
because the process has already reached its limit for open file descriptors.
.It Bq Er ENFILE
A jail descriptor could not be created for the
.Va desc
parameter with either the
.Dv JAIL_GET_DESC
or
.Dv JAIL_OWN_DESC
flag set,
because the system file table is full.
.El
.Pp
The
@ -333,6 +514,29 @@ The
system call
will fail if:
.Bl -tag -width Er
.It Bq Er EBADF
The
.Va desc
parameter does not refer to a valid jail descriptor,
and either the
.Dv JAIL_USE_DESC
or
.Dv JAIL_AT_DESC
flag was set.
.It Bq Er EACCES
Read permission is denied on the jail descriptor in the
.Va desc
parameter,
and the
.Dv JAIL_USE_DESC
flag was set.
.It Bq Er EACCES
Execute permission is denied on the jail descriptor in the
.Va desc
parameter,
and the
.Dv JAIL_AT_DESC
flag was set.
.It Bq Er EFAULT
.Fa Iov ,
or one of the addresses contained within it,
@ -352,10 +556,33 @@ jail.
The
.Va lastjid
parameter is greater than the highest current jail ID.
.It Bq Er ENOENT
The jail referred to by a
.Va desc
parameter has been removed
.Pq even if the Dv JAIL_CREATE flag has been set .
.It Bq Er EINVAL
A supplied parameter is the wrong size.
.It Bq Er EINVAL
A supplied parameter name does not match any known parameters.
.It Bq Er EMFILE
A jail descriptor could not be created for the
.Va desc
parameter with either the
.Dv JAIL_GET_DESC
or
.Dv JAIL_OWN_DESC
flag set,
because the process has already reached its limit for open file descriptors.
.It Bq Er ENFILE
A jail descriptor could not be created for the
.Va desc
parameter with either the
.Dv JAIL_GET_DESC
or
.Dv JAIL_OWN_DESC
flag set,
because the system file table is full.
.El
.Pp
The
@ -373,11 +600,39 @@ The jail specified by
does not exist.
.El
.Pp
The
.Fn jail_attach_jd
and
.Fn jail_remove_jd
system calls
will fail if:
.Bl -tag -width Er
.It Bq Er EBADF
The
.Fa fd
argument is not a valid jail descriptor.
.It Bq Er EACCES
Permission is denied on the jail descriptor
.Po
execute permission for
.Fn jail_attach_fd ,
or write permission for
.Fn jail_remove_fd
.Pc .
.It Bq Er EPERM
The jail descriptor was created by a user other than the super-user.
.It Bq Er EINVAL
The jail specified by
.Fa jid
has been removed.
.El
.Pp
Further
.Fn jail ,
.Fn jail_set ,
.Fn jail_attach ,
and
.Fn jail_attach
.Fn jail_attach_jd
call
.Xr chroot 2
internally, so they can fail for all the same reasons.

View file

@ -813,4 +813,8 @@ FBSDprivate_1.0 {
__sys_getgroups;
_setgroups;
__sys_setgroups;
_jail_attach_jd;
__sys_jail_attach_jd;
_jail_remove_jd;
__sys_jail_remove_jd;
};

View file

@ -515,4 +515,6 @@
#define FREEBSD32_SYS_inotify_rm_watch 594
#define FREEBSD32_SYS_getgroups 595
#define FREEBSD32_SYS_setgroups 596
#define FREEBSD32_SYS_MAXSYSCALL 597
#define FREEBSD32_SYS_jail_attach_jd 597
#define FREEBSD32_SYS_jail_remove_jd 598
#define FREEBSD32_SYS_MAXSYSCALL 599

View file

@ -602,4 +602,6 @@ const char *freebsd32_syscallnames[] = {
"inotify_rm_watch", /* 594 = inotify_rm_watch */
"getgroups", /* 595 = getgroups */
"setgroups", /* 596 = setgroups */
"jail_attach_jd", /* 597 = jail_attach_jd */
"jail_remove_jd", /* 598 = jail_remove_jd */
};

View file

@ -664,4 +664,6 @@ struct sysent freebsd32_sysent[] = {
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
{ .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
{ .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
};

View file

@ -3413,6 +3413,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 2;
break;
}
/* jail_attach_jd */
case 597: {
struct jail_attach_jd_args *p = params;
iarg[a++] = p->fd; /* int */
*n_args = 1;
break;
}
/* jail_remove_jd */
case 598: {
struct jail_remove_jd_args *p = params;
iarg[a++] = p->fd; /* int */
*n_args = 1;
break;
}
default:
*n_args = 0;
break;
@ -9222,6 +9236,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
/* jail_attach_jd */
case 597:
switch (ndx) {
case 0:
p = "int";
break;
default:
break;
};
break;
/* jail_remove_jd */
case 598:
switch (ndx) {
case 0:
p = "int";
break;
default:
break;
};
break;
default:
break;
};
@ -11130,6 +11164,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
/* jail_attach_jd */
case 597:
if (ndx == 0 || ndx == 1)
p = "int";
break;
/* jail_remove_jd */
case 598:
if (ndx == 0 || ndx == 1)
p = "int";
break;
default:
break;
};

View file

@ -3808,6 +3808,7 @@ kern/kern_hhook.c standard
kern/kern_idle.c standard
kern/kern_intr.c standard
kern/kern_jail.c standard
kern/kern_jaildesc.c standard
kern/kern_jailmeta.c standard
kern/kern_kcov.c optional kcov \
compile-with "${NOSAN_C} ${MSAN_CFLAGS}"

View file

@ -663,4 +663,6 @@ struct sysent sysent[] = {
{ .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
{ .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, .sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 595 = getgroups */
{ .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, .sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 596 = setgroups */
{ .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t *)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 597 = jail_attach_jd */
{ .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t *)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 598 = jail_remove_jd */
};

View file

@ -5250,6 +5250,8 @@ file_type_to_name(short type)
return ("eventfd");
case DTYPE_TIMERFD:
return ("timerfd");
case DTYPE_JAILDESC:
return ("jail");
default:
return ("unkn");
}

View file

@ -39,6 +39,7 @@
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/file.h>
#include <sys/sysproto.h>
#include <sys/malloc.h>
#include <sys/osd.h>
@ -49,6 +50,7 @@
#include <sys/taskqueue.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
#include <sys/jaildesc.h>
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/mman.h>
@ -988,6 +990,8 @@ prison_ip_cnt(const struct prison *pr, const pr_family_t af)
int
kern_jail_set(struct thread *td, struct uio *optuio, int flags)
{
struct file *jfp_out;
struct jaildesc *desc_in;
struct nameidata nd;
#ifdef INET
struct prison_ip *ip4;
@ -998,6 +1002,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
struct vfsopt *opt;
struct vfsoptlist *opts;
struct prison *pr, *deadpr, *dinspr, *inspr, *mypr, *ppr, *tpr;
struct ucred *jdcred;
struct vnode *root;
char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
char *g_path, *osrelstr;
@ -1011,7 +1016,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
int created, cuflags, descend, drflags, enforce;
int error, errmsg_len, errmsg_pos;
int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
int deadid, jid, jsys, len, level;
int deadid, jfd_in, jfd_out, jfd_pos, jid, jsys, len, level;
int childmax, osreldt, rsnum, slevel;
#ifdef INET
int ip4s;
@ -1027,17 +1032,26 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
unsigned tallow;
char numbuf[12];
error = priv_check(td, PRIV_JAIL_SET);
if (!error && (flags & JAIL_ATTACH))
error = priv_check(td, PRIV_JAIL_ATTACH);
if (error)
return (error);
mypr = td->td_ucred->cr_prison;
if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
if (((flags & (JAIL_CREATE | JAIL_AT_DESC)) == JAIL_CREATE)
&& mypr->pr_childmax == 0)
return (EPERM);
if (flags & ~JAIL_SET_MASK)
return (EINVAL);
if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC))
== (JAIL_USE_DESC | JAIL_AT_DESC))
return (EINVAL);
prison_hold(mypr);
#ifdef INET
ip4 = NULL;
#endif
#ifdef INET6
ip6 = NULL;
#endif
g_path = NULL;
jfp_out = NULL;
jfd_out = -1;
/*
* Check all the parameters before committing to anything. Not all
* errors can be caught early, but we may as well try. Also, this
@ -1050,14 +1064,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
*/
error = vfs_buildopts(optuio, &opts);
if (error)
return (error);
#ifdef INET
ip4 = NULL;
#endif
#ifdef INET6
ip6 = NULL;
#endif
g_path = NULL;
goto done_free;
cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
if (!cuflags) {
@ -1066,6 +1073,72 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_errmsg;
}
error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
if (error == ENOENT) {
if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
JAIL_OWN_DESC)) {
vfs_opterror(opts, "missing desc");
goto done_errmsg;
}
jfd_in = -1;
} else if (error != 0)
goto done_free;
else {
if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
JAIL_OWN_DESC))) {
vfs_opterror(opts, "unexpected desc");
goto done_errmsg;
}
if (flags & JAIL_AT_DESC) {
/*
* Look up and create jails based on the
* descriptor's prison.
*/
prison_free(mypr);
error = jaildesc_find(td, jfd_in, &desc_in, &mypr,
NULL);
if (error != 0) {
vfs_opterror(opts, error == ENOENT
? "descriptor to dead jail"
: "not a jail descriptor");
goto done_errmsg;
}
/*
* Check file permissions using the current
* credentials, and operation permissions
* using the descriptor's credentials.
*/
error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
desc_in->jd_gid, VEXEC, td->td_ucred);
JAILDESC_UNLOCK(desc_in);
if (error != 0)
goto done_free;
if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) {
error = EPERM;
goto done_free;
}
}
if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
/* Allocate a jail descriptor to return later. */
error = jaildesc_alloc(td, &jfp_out, &jfd_out,
flags & JAIL_OWN_DESC);
if (error)
goto done_free;
}
}
/*
* Delay the permission check if using a jail descriptor,
* until we get the descriptor's credentials.
*/
if (!(flags & JAIL_USE_DESC)) {
error = priv_check(td, PRIV_JAIL_SET);
if (error == 0 && (flags & JAIL_ATTACH))
error = priv_check(td, PRIV_JAIL_ATTACH);
if (error)
goto done_free;
}
error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
if (error == ENOENT)
jid = 0;
@ -1441,7 +1514,57 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
error = EAGAIN;
goto done_deref;
}
if (jid != 0) {
if (flags & JAIL_USE_DESC) {
/* Get the jail from its descriptor. */
error = jaildesc_find(td, jfd_in, &desc_in, &pr, &jdcred);
if (error) {
vfs_opterror(opts, error == ENOENT
? "descriptor to dead jail"
: "not a jail descriptor");
goto done_deref;
}
drflags |= PD_DEREF;
/*
* Check file permissions using the current credentials,
* and operation permissions using the descriptor's
* credentials.
*/
error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
desc_in->jd_gid, VWRITE, td->td_ucred);
if (error == 0 && (flags & JAIL_ATTACH))
error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
desc_in->jd_gid, VEXEC, td->td_ucred);
JAILDESC_UNLOCK(desc_in);
if (error == 0)
error = priv_check_cred(jdcred, PRIV_JAIL_SET);
if (error == 0 && (flags & JAIL_ATTACH))
error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
crfree(jdcred);
if (error)
goto done_deref;
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
if (cuflags == JAIL_CREATE) {
error = EEXIST;
vfs_opterror(opts, "jail %d already exists",
pr->pr_id);
goto done_deref;
}
if (!prison_isalive(pr)) {
/* While a jid can be resurrected, the prison
* itself cannot.
*/
error = ENOENT;
vfs_opterror(opts, "jail %d is dying", pr->pr_id);
goto done_deref;
}
if (jid != 0 && jid != pr->pr_id) {
error = EINVAL;
vfs_opterror(opts, "cannot change jid");
goto done_deref;
}
jid = pr->pr_id;
} else if (jid != 0) {
if (jid < 0) {
error = EINVAL;
vfs_opterror(opts, "negative jid");
@ -1575,7 +1698,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
}
}
/* Update: must provide a jid or name. */
/* Update: must provide a desc, jid, or name. */
else if (cuflags == JAIL_UPDATE && pr == NULL) {
error = ENOENT;
vfs_opterror(opts, "update specified no jail");
@ -1728,8 +1851,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* Grab a reference for existing prisons, to ensure they
* continue to exist for the duration of the call.
*/
prison_hold(pr);
drflags |= PD_DEREF;
if (!(drflags & PD_DEREF)) {
prison_hold(pr);
drflags |= PD_DEREF;
}
#if defined(VIMAGE) && (defined(INET) || defined(INET6))
if ((pr->pr_flags & PR_VNET) &&
(ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
@ -2158,6 +2283,26 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
printf("Warning jail jid=%d: mountd/nfsd requires a separate"
" file system\n", pr->pr_id);
/*
* Now that the prison is fully created without error, set the
* jail descriptor if one was requested. This is the only
* parameter that is returned to the caller (except the error
* message).
*/
if (jfd_out >= 0) {
if (!(drflags & PD_LOCKED)) {
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
}
jfd_pos = 2 * vfs_getopt_pos(opts, "desc") + 1;
if (optuio->uio_segflg == UIO_SYSSPACE)
*(int*)optuio->uio_iov[jfd_pos].iov_base = jfd_out;
else
(void)copyout(&jfd_out,
optuio->uio_iov[jfd_pos].iov_base, sizeof(jfd_out));
jaildesc_set_prison(jfp_out, pr);
}
drflags &= ~PD_KILL;
td->td_retval[0] = pr->pr_id;
@ -2195,15 +2340,21 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
}
done_free:
/* Clean up other resources. */
#ifdef INET
prison_ip_free(ip4);
#endif
#ifdef INET6
prison_ip_free(ip6);
#endif
if (jfp_out != NULL)
fdrop(jfp_out, td);
if (error && jfd_out >= 0)
(void)kern_close(td, jfd_out);
if (g_path != NULL)
free(g_path, M_TEMP);
vfs_freeopts(opts);
prison_free(mypr);
return (error);
}
@ -2348,16 +2499,22 @@ int
kern_jail_get(struct thread *td, struct uio *optuio, int flags)
{
struct bool_flags *bf;
struct file *jfp_out;
struct jaildesc *desc_in;
struct jailsys_flags *jsf;
struct prison *pr, *mypr;
struct vfsopt *opt;
struct vfsoptlist *opts;
char *errmsg, *name;
int drflags, error, errmsg_len, errmsg_pos, i, jid, len, pos;
int jfd_in, jfd_out;
unsigned f;
if (flags & ~JAIL_GET_MASK)
return (EINVAL);
if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC))
== (JAIL_USE_DESC | JAIL_AT_DESC))
return (EINVAL);
/* Get the parameter list. */
error = vfs_buildopts(optuio, &opts);
@ -2365,13 +2522,81 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
return (error);
errmsg_pos = vfs_getopt_pos(opts, "errmsg");
mypr = td->td_ucred->cr_prison;
prison_hold(mypr);
pr = NULL;
jfp_out = NULL;
jfd_out = -1;
/*
* Find the prison specified by one of: lastjid, jid, name.
* Find the prison specified by one of: desc, lastjid, jid, name.
*/
sx_slock(&allprison_lock);
drflags = PD_LIST_SLOCKED;
error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
if (error == ENOENT) {
if (flags & (JAIL_AT_DESC | JAIL_GET_DESC | JAIL_OWN_DESC)) {
vfs_opterror(opts, "missing desc");
goto done;
}
} else if (error == 0) {
if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
JAIL_OWN_DESC))) {
vfs_opterror(opts, "unexpected desc");
goto done;
}
if (flags & JAIL_USE_DESC) {
/* Get the jail from its descriptor. */
error = jaildesc_find(td, jfd_in, &desc_in, &pr, NULL);
if (error) {
vfs_opterror(opts, error == ENOENT
? "descriptor to dead jail"
: "not a jail descriptor");
goto done;
}
drflags |= PD_DEREF;
error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
desc_in->jd_gid, VREAD, td->td_ucred);
JAILDESC_UNLOCK(desc_in);
if (error != 0)
goto done;
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
if (!(prison_isalive(pr) || (flags & JAIL_DYING))) {
error = ENOENT;
vfs_opterror(opts, "jail %d is dying",
pr->pr_id);
goto done;
}
goto found_prison;
}
if (flags & JAIL_AT_DESC) {
/* Look up jails based on the descriptor's prison. */
prison_free(mypr);
error = jaildesc_find(td, jfd_in, &desc_in, &mypr,
NULL);
if (error != 0) {
vfs_opterror(opts, error == ENOENT
? "descriptor to dead jail"
: "not a jail descriptor");
goto done;
}
error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
desc_in->jd_gid, VEXEC, td->td_ucred);
JAILDESC_UNLOCK(desc_in);
if (error != 0)
goto done;
}
if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
/* Allocate a jail descriptor to return later. */
error = jaildesc_alloc(td, &jfp_out, &jfd_out,
flags & JAIL_OWN_DESC);
if (error)
goto done;
}
} else
goto done;
error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
if (error == 0) {
TAILQ_FOREACH(pr, &allprison, pr_list) {
@ -2440,9 +2665,17 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
found_prison:
/* Get the parameters of the prison. */
prison_hold(pr);
drflags |= PD_DEREF;
if (!(drflags & PD_DEREF)) {
prison_hold(pr);
drflags |= PD_DEREF;
}
td->td_retval[0] = pr->pr_id;
if (jfd_out >= 0) {
error = vfs_setopt(opts, "desc", &jfd_out, sizeof(jfd_out));
if (error != 0 && error != ENOENT)
goto done;
jaildesc_set_prison(jfp_out, pr);
}
error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
if (error != 0 && error != ENOENT)
goto done;
@ -2622,6 +2855,13 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
prison_deref(pr, drflags);
else if (drflags & PD_LIST_SLOCKED)
sx_sunlock(&allprison_lock);
else if (drflags & PD_LIST_XLOCKED)
sx_xunlock(&allprison_lock);
/* Clean up other resources. */
if (jfp_out != NULL)
(void)fdrop(jfp_out, td);
if (error && jfd_out >= 0)
(void)kern_close(td, jfd_out);
if (error && errmsg_pos >= 0) {
/* Write the error message back to userspace. */
vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
@ -2638,6 +2878,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
}
}
vfs_freeopts(opts);
prison_free(mypr);
return (error);
}
@ -2662,14 +2903,63 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
sx_xunlock(&allprison_lock);
return (EINVAL);
}
prison_hold(pr);
prison_remove(pr);
return (0);
}
/*
* struct jail_remove_jd_args {
* int fd;
* };
*/
int
sys_jail_remove_jd(struct thread *td, struct jail_remove_jd_args *uap)
{
struct jaildesc *jd;
struct prison *pr;
struct ucred *jdcred;
int error;
error = jaildesc_find(td, uap->fd, &jd, &pr, &jdcred);
if (error)
return (error);
/*
* Check file permissions using the current credentials, and
* operation permissions using the descriptor's credentials.
*/
error = vaccess(VREG, jd->jd_mode, jd->jd_uid, jd->jd_gid, VWRITE,
td->td_ucred);
JAILDESC_UNLOCK(jd);
if (error == 0)
error = priv_check_cred(jdcred, PRIV_JAIL_REMOVE);
crfree(jdcred);
if (error) {
prison_free(pr);
return (error);
}
sx_xlock(&allprison_lock);
mtx_lock(&pr->pr_mtx);
prison_remove(pr);
return (0);
}
/*
* Begin the removal process for a prison. The allprison lock should
* be held exclusively, and the prison should be both locked and held.
*/
void
prison_remove(struct prison *pr)
{
sx_assert(&allprison_lock, SA_XLOCKED);
mtx_assert(&pr->pr_mtx, MA_OWNED);
if (!prison_isalive(pr)) {
/* Silently ignore already-dying prisons. */
mtx_unlock(&pr->pr_mtx);
sx_xunlock(&allprison_lock);
return (0);
return;
}
prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED);
return (0);
prison_deref(pr, PD_KILL | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED);
}
/*
@ -2704,6 +2994,53 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED));
}
/*
* struct jail_attach_jd_args {
* int fd;
* };
*/
int
sys_jail_attach_jd(struct thread *td, struct jail_attach_jd_args *uap)
{
struct jaildesc *jd;
struct prison *pr;
struct ucred *jdcred;
int drflags, error;
sx_slock(&allprison_lock);
drflags = PD_LIST_SLOCKED;
error = jaildesc_find(td, uap->fd, &jd, &pr, &jdcred);
if (error)
goto fail;
drflags |= PD_DEREF;
/*
* Check file permissions using the current credentials, and
* operation permissions using the descriptor's credentials.
*/
error = vaccess(VREG, jd->jd_mode, jd->jd_uid, jd->jd_gid, VEXEC,
td->td_ucred);
JAILDESC_UNLOCK(jd);
if (error == 0)
error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
crfree(jdcred);
if (error)
goto fail;
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
/* Do not allow a process to attach to a prison that is not alive. */
if (!prison_isalive(pr)) {
error = EINVAL;
goto fail;
}
return (do_jail_attach(td, pr, drflags));
fail:
prison_deref(pr, drflags);
return (error);
}
static int
do_jail_attach(struct thread *td, struct prison *pr, int drflags)
{
@ -2722,9 +3059,12 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags)
* a process root from one prison, but attached to the jail
* of another.
*/
prison_hold(pr);
if (!(drflags & PD_DEREF)) {
prison_hold(pr);
drflags |= PD_DEREF;
}
refcount_acquire(&pr->pr_uref);
drflags |= PD_DEREF | PD_DEUREF;
drflags |= PD_DEUREF;
mtx_unlock(&pr->pr_mtx);
drflags &= ~PD_LOCKED;
@ -3444,6 +3784,7 @@ prison_cleanup_locked(struct prison *pr)
mtx_assert(&pr->pr_mtx, MA_OWNED);
prison_knote(pr, NOTE_JAIL_REMOVE);
knlist_detach(pr->pr_klist);
jaildesc_prison_cleanup(pr);
pr->pr_klist = NULL;
}
@ -4650,6 +4991,7 @@ sysctl_jail_param(SYSCTL_HANDLER_ARGS)
* jail creation time but cannot be changed in an existing jail.
*/
SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
SYSCTL_JAIL_PARAM(, desc, CTLTYPE_INT | CTLFLAG_RW, "I", "Jail descriptor");
SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");

337
sys/kern/kern_jaildesc.c Normal file
View file

@ -0,0 +1,337 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2025 James Gritton.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/jail.h>
#include <sys/jaildesc.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/priv.h>
#include <sys/stat.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
#include <sys/ucred.h>
#include <sys/vnode.h>
MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors");
static fo_stat_t jaildesc_stat;
static fo_close_t jaildesc_close;
static fo_chmod_t jaildesc_chmod;
static fo_chown_t jaildesc_chown;
static fo_fill_kinfo_t jaildesc_fill_kinfo;
static fo_cmp_t jaildesc_cmp;
static struct fileops jaildesc_ops = {
.fo_read = invfo_rdwr,
.fo_write = invfo_rdwr,
.fo_truncate = invfo_truncate,
.fo_ioctl = invfo_ioctl,
.fo_poll = invfo_poll,
.fo_kqfilter = invfo_kqfilter,
.fo_stat = jaildesc_stat,
.fo_close = jaildesc_close,
.fo_chmod = jaildesc_chmod,
.fo_chown = jaildesc_chown,
.fo_sendfile = invfo_sendfile,
.fo_fill_kinfo = jaildesc_fill_kinfo,
.fo_cmp = jaildesc_cmp,
.fo_flags = DFLAG_PASSABLE,
};
/*
* Given a jail descriptor number, return the jaildesc, its prison,
* and its credential. The jaildesc will be returned locked, and
* prison and the credential will be returned held.
*/
int
jaildesc_find(struct thread *td, int fd, struct jaildesc **jdp,
struct prison **prp, struct ucred **ucredp)
{
struct file *fp;
struct jaildesc *jd;
struct prison *pr;
int error;
error = fget(td, fd, &cap_no_rights, &fp);
if (error != 0)
return (error);
if (fp->f_type != DTYPE_JAILDESC) {
error = EBADF;
goto out;
}
jd = fp->f_data;
JAILDESC_LOCK(jd);
pr = jd->jd_prison;
if (pr == NULL || !prison_isvalid(pr)) {
error = ENOENT;
JAILDESC_UNLOCK(jd);
goto out;
}
prison_hold(pr);
*prp = pr;
if (jdp != NULL)
*jdp = jd;
else
JAILDESC_UNLOCK(jd);
if (ucredp != NULL)
*ucredp = crhold(fp->f_cred);
out:
fdrop(fp, td);
return (error);
}
/*
* Allocate a new jail decriptor, not yet associated with a prison.
* Return the file pointer (with a reference held) and the descriptor
* number.
*/
int
jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning)
{
struct file *fp;
struct jaildesc *jd;
int error;
mode_t mode;
if (owning) {
error = priv_check(td, PRIV_JAIL_REMOVE);
if (error != 0)
return (error);
mode = S_ISTXT;
} else
mode = 0;
jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO);
error = falloc_caps(td, &fp, fdp, 0, NULL);
finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0
? FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops);
if (error != 0) {
free(jd, M_JAILDESC);
return (error);
}
JAILDESC_LOCK_INIT(jd);
jd->jd_uid = fp->f_cred->cr_uid;
jd->jd_gid = fp->f_cred->cr_gid;
jd->jd_mode = S_IFREG | S_IRUSR | S_IRGRP | S_IROTH | mode
| (priv_check(td, PRIV_JAIL_SET) == 0 ? S_IWUSR | S_IXUSR : 0)
| (priv_check(td, PRIV_JAIL_ATTACH) == 0 ? S_IXUSR : 0);
*fpp = fp;
return (0);
}
/*
* Assocate a jail descriptor with its prison.
*/
void
jaildesc_set_prison(struct file *fp, struct prison *pr)
{
struct jaildesc *jd;
mtx_assert(&pr->pr_mtx, MA_OWNED);
jd = fp->f_data;
JAILDESC_LOCK(jd);
jd->jd_prison = pr;
LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list);
prison_hold(pr);
JAILDESC_UNLOCK(jd);
}
/*
* Detach the all jail descriptors from a prison.
*/
void
jaildesc_prison_cleanup(struct prison *pr)
{
struct jaildesc *jd;
mtx_assert(&pr->pr_mtx, MA_OWNED);
while ((jd = LIST_FIRST(&pr->pr_descs))) {
JAILDESC_LOCK(jd);
LIST_REMOVE(jd, jd_list);
jd->jd_prison = NULL;
JAILDESC_UNLOCK(jd);
prison_free(pr);
}
}
static int
jaildesc_close(struct file *fp, struct thread *td)
{
struct jaildesc *jd;
struct prison *pr;
jd = fp->f_data;
fp->f_data = NULL;
if (jd != NULL) {
JAILDESC_LOCK(jd);
pr = jd->jd_prison;
if (pr != NULL) {
/*
* Free or remove the associated prison.
* This requires a second check after re-
* ordering locks. This jaildesc can remain
* unlocked once we have a prison reference,
* because that prison is the only place that
* still points back to it.
*/
prison_hold(pr);
JAILDESC_UNLOCK(jd);
if (jd->jd_mode & S_ISTXT) {
sx_xlock(&allprison_lock);
prison_lock(pr);
if (jd->jd_prison != NULL) {
/*
* Unlink the prison, but don't free
* it; that will be done as part of
* of prison_remove.
*/
LIST_REMOVE(jd, jd_list);
prison_remove(pr);
} else {
prison_unlock(pr);
sx_xunlock(&allprison_lock);
}
} else {
prison_lock(pr);
if (jd->jd_prison != NULL) {
LIST_REMOVE(jd, jd_list);
prison_free(pr);
}
prison_unlock(pr);
}
prison_free(pr);
}
JAILDESC_LOCK_DESTROY(jd);
free(jd, M_JAILDESC);
}
finit(fp, 0, DTYPE_NONE, NULL, &badfileops);
return (0);
}
static int
jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
{
struct jaildesc *jd;
bzero(sb, sizeof(struct stat));
jd = fp->f_data;
JAILDESC_LOCK(jd);
if (jd->jd_prison != NULL) {
sb->st_ino = jd->jd_prison ? jd->jd_prison->pr_id : 0;
sb->st_uid = jd->jd_uid;
sb->st_gid = jd->jd_gid;
sb->st_mode = jd->jd_mode;
} else
sb->st_mode = S_IFREG;
JAILDESC_UNLOCK(jd);
return (0);
}
static int
jaildesc_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
struct thread *td)
{
struct jaildesc *jd;
int error;
/* Reject permissions that the creator doesn't have. */
if (((mode & (S_IWUSR | S_IWGRP | S_IWOTH))
&& priv_check_cred(fp->f_cred, PRIV_JAIL_SET) != 0)
|| ((mode & (S_IXUSR | S_IXGRP | S_IXOTH))
&& priv_check_cred(fp->f_cred, PRIV_JAIL_ATTACH) != 0
&& priv_check_cred(fp->f_cred, PRIV_JAIL_SET) != 0)
|| ((mode & S_ISTXT)
&& priv_check_cred(fp->f_cred, PRIV_JAIL_REMOVE) != 0))
return (EPERM);
if (mode & (S_ISUID | S_ISGID))
return (EINVAL);
jd = fp->f_data;
JAILDESC_LOCK(jd);
error = vaccess(VREG, jd->jd_mode, jd->jd_uid, jd->jd_gid, VADMIN,
active_cred);
if (error == 0)
jd->jd_mode = S_IFREG | (mode & ALLPERMS);
JAILDESC_UNLOCK(jd);
return (error);
}
static int
jaildesc_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
struct thread *td)
{
struct jaildesc *jd;
int error;
error = 0;
jd = fp->f_data;
JAILDESC_LOCK(jd);
if (uid == (uid_t)-1)
uid = jd->jd_uid;
if (gid == (gid_t)-1)
gid = jd->jd_gid;
if ((uid != jd->jd_uid && uid != active_cred->cr_uid) ||
(gid != jd->jd_gid && !groupmember(gid, active_cred)))
error = priv_check_cred(active_cred, PRIV_VFS_CHOWN);
if (error == 0) {
jd->jd_uid = uid;
jd->jd_gid = gid;
}
JAILDESC_UNLOCK(jd);
return (error);
}
static int
jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif,
struct filedesc *fdp)
{
return (EINVAL);
}
static int
jaildesc_cmp(struct file *fp1, struct file *fp2, struct thread *td)
{
struct jaildesc *jd1, *jd2;
int jid1, jid2;
if (fp2->f_type != DTYPE_JAILDESC)
return (3);
jd1 = fp1->f_data;
JAILDESC_LOCK(jd1);
jid1 = jd1->jd_prison ? (uintptr_t)jd1->jd_prison->pr_id : 0;
JAILDESC_UNLOCK(jd1);
jd2 = fp2->f_data;
JAILDESC_LOCK(jd2);
jid2 = jd2->jd_prison ? (uintptr_t)jd2->jd_prison->pr_id : 0;
JAILDESC_UNLOCK(jd2);
return (kcmp_cmp(jid1, jid2));
}

View file

@ -602,4 +602,6 @@ const char *syscallnames[] = {
"inotify_rm_watch", /* 594 = inotify_rm_watch */
"getgroups", /* 595 = getgroups */
"setgroups", /* 596 = setgroups */
"jail_attach_jd", /* 597 = jail_attach_jd */
"jail_remove_jd", /* 598 = jail_remove_jd */
};

View file

@ -3383,5 +3383,15 @@
_In_reads_(gidsetsize) const gid_t *gidset
);
}
597 AUE_JAIL_ATTACH STD {
int jail_attach_jd(
int fd
);
}
598 AUE_JAIL_REMOVE STD {
int jail_remove_jd(
int fd
);
}
; vim: syntax=off

View file

@ -3500,6 +3500,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 2;
break;
}
/* jail_attach_jd */
case 597: {
struct jail_attach_jd_args *p = params;
iarg[a++] = p->fd; /* int */
*n_args = 1;
break;
}
/* jail_remove_jd */
case 598: {
struct jail_remove_jd_args *p = params;
iarg[a++] = p->fd; /* int */
*n_args = 1;
break;
}
default:
*n_args = 0;
break;
@ -9367,6 +9381,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
/* jail_attach_jd */
case 597:
switch (ndx) {
case 0:
p = "int";
break;
default:
break;
};
break;
/* jail_remove_jd */
case 598:
switch (ndx) {
case 0:
p = "int";
break;
default:
break;
};
break;
default:
break;
};
@ -11365,6 +11399,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
/* jail_attach_jd */
case 597:
if (ndx == 0 || ndx == 1)
p = "int";
break;
/* jail_remove_jd */
case 598:
if (ndx == 0 || ndx == 1)
p = "int";
break;
default:
break;
};

View file

@ -72,6 +72,7 @@ struct nameidata;
#define DTYPE_EVENTFD 13 /* eventfd */
#define DTYPE_TIMERFD 14 /* timerfd */
#define DTYPE_INOTIFY 15 /* inotify descriptor */
#define DTYPE_JAILDESC 16 /* jail descriptor */
#ifdef _KERNEL

View file

@ -99,8 +99,12 @@ enum prison_state {
#define JAIL_UPDATE 0x02 /* Update parameters of existing jail */
#define JAIL_ATTACH 0x04 /* Attach to jail upon creation */
#define JAIL_DYING 0x08 /* Allow getting a dying jail */
#define JAIL_SET_MASK 0x0f /* JAIL_DYING is deprecated/ignored here */
#define JAIL_GET_MASK 0x08
#define JAIL_USE_DESC 0x10 /* Get/set jail in descriptor */
#define JAIL_AT_DESC 0x20 /* Find/add jail under descriptor */
#define JAIL_GET_DESC 0x40 /* Return a new jail descriptor */
#define JAIL_OWN_DESC 0x80 /* Return a new owning jail descriptor */
#define JAIL_SET_MASK 0xff /* JAIL_DYING is deprecated/ignored here */
#define JAIL_GET_MASK 0xf8
#define JAIL_SYS_DISABLE 0
#define JAIL_SYS_NEW 1
@ -115,7 +119,9 @@ int jail(struct jail *);
int jail_set(struct iovec *, unsigned int, int);
int jail_get(struct iovec *, unsigned int, int);
int jail_attach(int);
int jail_attach_jd(int);
int jail_remove(int);
int jail_remove_jd(int);
__END_DECLS
#else /* _KERNEL */
@ -144,6 +150,7 @@ MALLOC_DECLARE(M_PRISON);
#define JAIL_META_PRIVATE "meta"
#define JAIL_META_SHARED "env"
struct jaildesc;
struct knlist;
struct racct;
struct prison_racct;
@ -191,7 +198,8 @@ struct prison {
struct prison_ip *pr_addrs[PR_FAMILY_MAX]; /* (p,n) IPs of jail */
struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */
struct knlist *pr_klist; /* (m) attached knotes */
void *pr_sparep[2];
LIST_HEAD(, jaildesc) pr_descs; /* (a) attached descriptors */
void *pr_sparep;
int pr_childcount; /* (a) number of child jails */
int pr_childmax; /* (p) maximum child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
@ -466,6 +474,7 @@ void prison_proc_free(struct prison *);
void prison_proc_link(struct prison *, struct proc *);
void prison_proc_unlink(struct prison *, struct proc *);
void prison_proc_iterate(struct prison *, void (*)(struct proc *, void *), void *);
void prison_remove(struct prison *);
void prison_set_allow(struct ucred *cred, unsigned flag, int enable);
bool prison_ischild(struct prison *, struct prison *);
bool prison_isalive(const struct prison *);

85
sys/sys/jaildesc.h Normal file
View file

@ -0,0 +1,85 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2025 James Gritton.
* All rights reserved.
*
* This software was developed at the University of Cambridge Computer
* Laboratory with support from a grant from Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _SYS_JAILDESC_H_
#define _SYS_JAILDESC_H_
#ifdef _KERNEL
#include <sys/queue.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_types.h>
struct prison;
/*-
* struct jaildesc describes a jail descriptor, which points to a struct
* prison. struct prison in turn has a linked list of struct jaildesc.
*
* Locking key:
* (c) set on creation, remains unchanged
* (d) jd_lock
* (p) jd_prison->pr_mtx
*/
struct jaildesc {
LIST_ENTRY(jaildesc) jd_list; /* (d,p) this prison's descs */
struct prison *jd_prison; /* (d) the prison */
struct mtx jd_lock;
uid_t jd_uid; /* (d) nominal file owner */
gid_t jd_gid; /* (d) nominal file group */
mode_t jd_mode; /* (d) descriptor permissions */
unsigned jd_flags; /* (d) JDF_* flags */
};
/*
* Locking macros for the jaildesc.
*/
#define JAILDESC_LOCK_DESTROY(jd) mtx_destroy(&(jd)->jd_lock)
#define JAILDESC_LOCK_INIT(jd) mtx_init(&(jd)->jd_lock, "jaildesc", \
NULL, MTX_DEF)
#define JAILDESC_LOCK(jd) mtx_lock(&(jd)->jd_lock)
#define JAILDESC_UNLOCK(jd) mtx_unlock(&(jd)->jd_lock)
/*
* Flags for the jd_flags field
*/
#define JDF_REMOVED 0x00000002 /* jail was removed */
int jaildesc_find(struct thread *td, int fd, struct jaildesc **jdp,
struct prison **prp, struct ucred **ucredp);
int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning);
void jaildesc_set_prison(struct file *jd, struct prison *pr);
void jaildesc_prison_cleanup(struct prison *pr);
#endif /* _KERNEL */
#endif /* !_SYS_JAILDESC_H_ */

View file

@ -535,4 +535,6 @@
#define SYS_inotify_rm_watch 594
#define SYS_getgroups 595
#define SYS_setgroups 596
#define SYS_MAXSYSCALL 597
#define SYS_jail_attach_jd 597
#define SYS_jail_remove_jd 598
#define SYS_MAXSYSCALL 599

View file

@ -438,4 +438,6 @@ MIASM = \
inotify_add_watch_at.o \
inotify_rm_watch.o \
getgroups.o \
setgroups.o
setgroups.o \
jail_attach_jd.o \
jail_remove_jd.o

View file

@ -1901,6 +1901,12 @@ struct setgroups_args {
char gidsetsize_l_[PADL_(int)]; int gidsetsize; char gidsetsize_r_[PADR_(int)];
char gidset_l_[PADL_(const gid_t *)]; const gid_t * gidset; char gidset_r_[PADR_(const gid_t *)];
};
struct jail_attach_jd_args {
char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
};
struct jail_remove_jd_args {
char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
};
int sys__exit(struct thread *, struct _exit_args *);
int sys_fork(struct thread *, struct fork_args *);
int sys_read(struct thread *, struct read_args *);
@ -2305,6 +2311,8 @@ int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args *
int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *);
int sys_getgroups(struct thread *, struct getgroups_args *);
int sys_setgroups(struct thread *, struct setgroups_args *);
int sys_jail_attach_jd(struct thread *, struct jail_attach_jd_args *);
int sys_jail_remove_jd(struct thread *, struct jail_remove_jd_args *);
#ifdef COMPAT_43
@ -3301,6 +3309,8 @@ int freebsd14_setgroups(struct thread *, struct freebsd14_setgroups_args *);
#define SYS_AUE_inotify_rm_watch AUE_INOTIFY
#define SYS_AUE_getgroups AUE_GETGROUPS
#define SYS_AUE_setgroups AUE_SETGROUPS
#define SYS_AUE_jail_attach_jd AUE_JAIL_ATTACH
#define SYS_AUE_jail_remove_jd AUE_JAIL_REMOVE
#undef PAD_
#undef PADL_

View file

@ -266,6 +266,7 @@ struct user {
#define KF_TYPE_EVENTFD 13
#define KF_TYPE_TIMERFD 14
#define KF_TYPE_INOTIFY 15
#define KF_TYPE_JAILDESC 16
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@ -452,6 +453,9 @@ struct kinfo_file {
uint32_t kf_timerfd_flags;
uint64_t kf_timerfd_addr;
} kf_timerfd;
struct {
int32_t kf_jid;
} kf_jail;
struct {
uint64_t kf_kqueue_addr;
int32_t kf_kqueue_count;