libuvmem: usermode port of vmem(9)

The quantum cache is disabled, there is no uma.

Intent is to use this for resource allocation in bhyve(8), for start.
Addition of -luvmem to bhyve linking was done to test changes to share/mk.

Reviewed by:	bnovkov, markj
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential Revision:	https://reviews.freebsd.org/D27220
This commit is contained in:
Konstantin Belousov 2020-12-21 19:41:34 +02:00
parent 305e33d4c6
commit 1ecf01065b
9 changed files with 194 additions and 23 deletions

View file

@ -107,6 +107,7 @@ SUBDIR= ${SUBDIR_BOOTSTRAP} \
libulog \
libutil \
libutil++ \
libuvmem \
${_libvgl} \
libwrap \
libxo \

17
lib/libuvmem/Makefile Normal file
View file

@ -0,0 +1,17 @@
PACKAGE= runtime
LIB= uvmem
SRCS= subr_vmem.c
SHLIB_MAJOR= 1
LIBADD+= pthread
CFLAGS.clang+=-Wno-thread-safety-analysis
SYMBOL_MAPS= ${.CURDIR}/Symbol.map
VERSION_DEF= ${SRCTOP}/lib/libc/Versions.def
.include <src.opts.mk>
.PATH: ${SRCTOP}/sys/kern
.include <bsd.lib.mk>

15
lib/libuvmem/Symbol.map Normal file
View file

@ -0,0 +1,15 @@
FBSD_1.9 {
vmem_add;
vmem_alloc;
vmem_create;
vmem_destroy;
vmem_free;
vmem_init;
vmem_roundup_size;
vmem_set_import;
vmem_set_limit;
vmem_set_reclaim;
vmem_size;
vmem_xalloc;
vmem_xfree;
};

View file

@ -171,6 +171,7 @@ LIBUSB?= ${LIBDESTDIR}${LIBDIR_BASE}/libusb.a
LIBUSBHID?= ${LIBDESTDIR}${LIBDIR_BASE}/libusbhid.a
LIBUTIL?= ${LIBDESTDIR}${LIBDIR_BASE}/libutil.a
LIBUUTIL?= ${LIBDESTDIR}${LIBDIR_BASE}/libuutil.a
LIBUVMEM?= ${LIBDESTDIR}${LIBDIR_BASE}/libuvmem.a
LIBVERTO?= ${LIBDESTDIR}${LIBDIR_BASE}/libverto.a
LIBVGL?= ${LIBDESTDIR}${LIBDIR_BASE}/libvgl.a
LIBVMMAPI?= ${LIBDESTDIR}${LIBDIR_BASE}/libvmmapi.a

View file

@ -237,6 +237,7 @@ _LIBRARIES= \
usb \
usbhid \
util \
uvmem \
uutil \
verto \
vmmapi \
@ -491,6 +492,7 @@ _DP_fifolog= z
_DP_ipf= kvm
_DP_tpool= spl
_DP_uutil= avl spl
_DP_uvmem= pthread
_DP_zfs= md pthread rt umem util uutil m avl bsdxml crypto geom nvpair \
z zfs_core zutil
_DP_zfsbootenv= zfs nvpair
@ -767,6 +769,9 @@ LIBSYS_PIC?= ${LIBSYS_PICDIR}/libsys_pic.a
LIBSAMPLERATEDIR?= ${_LIB_OBJTOP}/lib/libsamplerate
LIBSAMPLERATE?= ${LIBSAMPLERATEDIR}/libsamplerate${PIE_SUFFIX}.a
LIBUVMEMDIR= ${OBJTOP}/lib/libuvmem
LIBUVMEM?= ${LIBUVMEMDIR}/libuvmem${PIE_SUFFIX}.a
# Define a directory for each library. This is useful for adding -L in when
# not using a --sysroot or for meta mode bootstrapping when there is no
# Makefile.depend. These are sorted by directory.

View file

@ -50,7 +50,9 @@ extern "C" {
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#ifndef __FreeBSD__
#include <sys/vmem.h>
#endif
#include <sys/misc.h>
#include <sys/taskq.h>
#include <sys/param.h>

View file

@ -41,6 +41,9 @@
*/
#include <sys/cdefs.h>
#ifdef _KERNEL
#include "opt_ddb.h"
#include <sys/param.h>
@ -75,6 +78,28 @@
#include <vm/vm_pagequeue.h>
#include <vm/uma_int.h>
#else /* _KERNEL */
#include <sys/types.h>
#include <sys/queue.h>
#include <sys/hash.h>
#include <sys/vmem.h>
#include <assert.h>
#include <errno.h>
#include <pthread.h>
#include <pthread_np.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#define KASSERT(a, b)
#define MPASS(a)
#define WITNESS_WARN(a, b, c)
#define panic(...) assert(0)
#endif /* _KERNEL */
#define VMEM_OPTORDER 5
#define VMEM_OPTVALUE (1 << VMEM_OPTORDER)
#define VMEM_MAXORDER \
@ -87,24 +112,40 @@
#define VMEM_FITMASK (M_BESTFIT | M_FIRSTFIT | M_NEXTFIT)
#define VMEM_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | \
M_BESTFIT | M_FIRSTFIT | M_NEXTFIT)
#define BT_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM)
#define QC_NAME_MAX 16
/*
* Data structures private to vmem.
*/
#ifdef _KERNEL
#define VMEM_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | \
M_BESTFIT | M_FIRSTFIT | M_NEXTFIT)
#define BT_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM)
MALLOC_DEFINE(M_VMEM, "vmem", "vmem internal structures");
#else /* _KERNEL */
/* bit-compat with kernel */
#define M_ZERO 0
#define M_NOVM 0
#define M_USE_RESERVE 0
#define VMEM_FLAGS (M_NOWAIT | M_BESTFIT | M_FIRSTFIT | M_NEXTFIT)
#define BT_FLAGS 0
#endif /* _KERNEL */
typedef struct vmem_btag bt_t;
TAILQ_HEAD(vmem_seglist, vmem_btag);
LIST_HEAD(vmem_freelist, vmem_btag);
LIST_HEAD(vmem_hashlist, vmem_btag);
#ifdef _KERNEL
struct qcache {
uma_zone_t qc_cache;
vmem_t *qc_vmem;
@ -113,6 +154,7 @@ struct qcache {
};
typedef struct qcache qcache_t;
#define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache))
#endif
#define VMEM_NAME_MAX 16
@ -132,8 +174,13 @@ struct vmem_btag {
/* vmem arena */
struct vmem {
#ifdef _KERNEL
struct mtx_padalign vm_lock;
struct cv vm_cv;
#else
pthread_mutex_t vm_lock;
pthread_cond_t vm_cv;
#endif
char vm_name[VMEM_NAME_MAX+1];
LIST_ENTRY(vmem) vm_alllist;
struct vmem_hashlist vm_hash0[VMEM_HASHSIZE_MIN];
@ -165,8 +212,10 @@ struct vmem {
/* Space exhaustion callback. */
vmem_reclaim_t *vm_reclaimfn;
#ifdef _KERNEL
/* quantum cache */
qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX];
#endif
};
#define BT_TYPE_SPAN 1 /* Allocated from importfn */
@ -178,6 +227,7 @@ struct vmem {
#define BT_END(bt) ((bt)->bt_start + (bt)->bt_size - 1)
#ifdef _KERNEL
#if defined(DIAGNOSTIC)
static int enable_vmem_check = 0;
SYSCTL_INT(_debug, OID_AUTO, vmem_check, CTLFLAG_RWTUN,
@ -190,10 +240,17 @@ static int vmem_periodic_interval;
static struct task vmem_periodic_wk;
static struct mtx_padalign __exclusive_cache_line vmem_list_lock;
static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list);
static uma_zone_t vmem_zone;
#else /* _KERNEL */
static pthread_mutex_t vmem_list_lock = PTHREAD_MUTEX_INITIALIZER;
#endif /* _KERNEL */
static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list);
/* ---- misc */
#ifdef _KERNEL
#define VMEM_LIST_LOCK() mtx_lock(&vmem_list_lock)
#define VMEM_LIST_UNLOCK() mtx_unlock(&vmem_list_lock)
@ -207,6 +264,21 @@ static uma_zone_t vmem_zone;
#define VMEM_LOCK_INIT(vm, name) mtx_init(&vm->vm_lock, (name), NULL, MTX_DEF)
#define VMEM_LOCK_DESTROY(vm) mtx_destroy(&vm->vm_lock)
#define VMEM_ASSERT_LOCKED(vm) mtx_assert(&vm->vm_lock, MA_OWNED);
#else /* _KERNEL */
#define VMEM_LIST_LOCK() pthread_mutex_lock(&vmem_list_lock)
#define VMEM_LIST_UNLOCK() pthread_mutex_unlock(&vmem_list_lock)
#define VMEM_CONDVAR_INIT(vm, wchan) pthread_cond_init(&vm->vm_cv, NULL)
#define VMEM_CONDVAR_DESTROY(vm) pthread_cond_destroy(&vm->vm_cv)
#define VMEM_CONDVAR_WAIT(vm) pthread_cond_wait(&vm->vm_cv, &vm->vm_lock)
#define VMEM_CONDVAR_BROADCAST(vm) pthread_cond_broadcast(&vm->vm_cv)
#define VMEM_LOCK(vm) pthread_mutex_lock(&vm->vm_lock)
#define VMEM_UNLOCK(vm) pthread_mutex_unlock(&vm->vm_lock)
#define VMEM_LOCK_INIT(vm, name) pthread_mutex_init(&vm->vm_lock, NULL)
#define VMEM_LOCK_DESTROY(vm) pthread_mutex_destroy(&vm->vm_lock)
#define VMEM_ASSERT_LOCKED(vm) pthread_mutex_isowned_np(&vm->vm_lock)
#endif /* _KERNEL */
#define VMEM_ALIGNUP(addr, align) (-(-(addr) & -(align)))
@ -231,6 +303,7 @@ static uma_zone_t vmem_zone;
*/
#define BT_MAXFREE (BT_MAXALLOC * 8)
#ifdef _KERNEL
/* Allocator for boundary tags. */
static uma_zone_t vmem_bt_zone;
@ -245,7 +318,8 @@ vmem_t *transient_arena = &transient_arena_storage;
#ifdef DEBUG_MEMGUARD
static struct vmem memguard_arena_storage;
vmem_t *memguard_arena = &memguard_arena_storage;
#endif
#endif /* DEBUG_MEMGUARD */
#endif /* _KERNEL */
static bool
bt_isbusy(bt_t *bt)
@ -265,12 +339,13 @@ bt_isfree(bt_t *bt)
* at least the maximum possible tag allocations in the arena.
*/
static __noinline int
_bt_fill(vmem_t *vm, int flags)
_bt_fill(vmem_t *vm, int flags __unused)
{
bt_t *bt;
VMEM_ASSERT_LOCKED(vm);
#ifdef _KERNEL
/*
* Only allow the kernel arena and arenas derived from kernel arena to
* dip into reserve tags. They are where new tags come from.
@ -278,6 +353,7 @@ _bt_fill(vmem_t *vm, int flags)
flags &= BT_FLAGS;
if (vm != kernel_arena && vm->vm_arg != kernel_arena)
flags &= ~M_USE_RESERVE;
#endif
/*
* Loop until we meet the reserve. To minimize the lock shuffle
@ -286,12 +362,18 @@ _bt_fill(vmem_t *vm, int flags)
* holding a vmem lock.
*/
while (vm->vm_nfreetags < BT_MAXALLOC) {
#ifdef _KERNEL
bt = uma_zalloc(vmem_bt_zone,
(flags & M_USE_RESERVE) | M_NOWAIT | M_NOVM);
#else
bt = malloc(sizeof(struct vmem_btag));
#endif
if (bt == NULL) {
#ifdef _KERNEL
VMEM_UNLOCK(vm);
bt = uma_zalloc(vmem_bt_zone, flags);
VMEM_LOCK(vm);
#endif
if (bt == NULL)
break;
}
@ -351,7 +433,11 @@ bt_freetrim(vmem_t *vm, int freelimit)
VMEM_UNLOCK(vm);
while ((bt = LIST_FIRST(&freetags)) != NULL) {
LIST_REMOVE(bt, bt_freelist);
#ifdef _KERNEL
uma_zfree(vmem_bt_zone, bt);
#else
free(bt);
#endif
}
}
@ -538,6 +624,7 @@ bt_insfree(vmem_t *vm, bt_t *bt)
/* ---- vmem internal functions */
#ifdef _KERNEL
/*
* Import from the arena into the quantum cache in UMA.
*
@ -722,8 +809,6 @@ vmem_startup(void)
#endif
}
/* ---- rehash */
static int
vmem_rehash(vmem_t *vm, vmem_size_t newhashsize)
{
@ -821,6 +906,7 @@ vmem_start_callout(void *unused)
vmem_periodic_kick, NULL);
}
SYSINIT(vfs, SI_SUB_CONFIGURE, SI_ORDER_ANY, vmem_start_callout, NULL);
#endif /* _KERNEL */
static void
vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int type)
@ -876,10 +962,12 @@ vmem_destroy1(vmem_t *vm)
{
bt_t *bt;
#ifdef _KERNEL
/*
* Drain per-cpu quantum caches.
*/
qc_destroy(vm);
#endif
/*
* The vmem should now only contain empty segments.
@ -891,14 +979,23 @@ vmem_destroy1(vmem_t *vm)
while ((bt = TAILQ_FIRST(&vm->vm_seglist)) != NULL)
bt_remseg(vm, bt);
if (vm->vm_hashlist != NULL && vm->vm_hashlist != vm->vm_hash0)
if (vm->vm_hashlist != NULL && vm->vm_hashlist != vm->vm_hash0) {
#ifdef _KERNEL
free(vm->vm_hashlist, M_VMEM);
#else
free(vm->vm_hashlist);
#endif
}
bt_freetrim(vm, 0);
VMEM_CONDVAR_DESTROY(vm);
VMEM_LOCK_DESTROY(vm);
#ifdef _KERNEL
uma_zfree(vmem_zone, vm);
#else
free(vm);
#endif
}
static int
@ -1055,8 +1152,10 @@ vmem_try_fetch(vmem_t *vm, const vmem_size_t size, vmem_size_t align, int flags)
avail = vm->vm_size - vm->vm_inuse;
bt_save(vm);
VMEM_UNLOCK(vm);
#ifdef _KERNEL
if (vm->vm_qcache_max != 0)
qc_drain(vm);
#endif
if (vm->vm_reclaimfn != NULL)
vm->vm_reclaimfn(vm, flags);
VMEM_LOCK(vm);
@ -1236,8 +1335,14 @@ vmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size,
{
vmem_size_t i;
#ifdef _KERNEL
MPASS(quantum > 0);
MPASS((quantum & (quantum - 1)) == 0);
#else
assert(quantum == 0);
assert(qcache_max == 0);
quantum = 1;
#endif
bzero(vm, sizeof(*vm));
@ -1252,7 +1357,11 @@ vmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size,
vm->vm_size = 0;
vm->vm_limit = 0;
vm->vm_inuse = 0;
#ifdef _KERNEL
qc_init(vm, qcache_max);
#else
(void)qcache_max;
#endif
TAILQ_INIT(&vm->vm_seglist);
vm->vm_cursor.bt_start = vm->vm_cursor.bt_size = 0;
@ -1290,7 +1399,13 @@ vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
vmem_t *vm;
#ifdef _KERNEL
vm = uma_zalloc(vmem_zone, flags & (M_WAITOK|M_NOWAIT));
#else
assert(quantum == 0);
assert(qcache_max == 0);
vm = malloc(sizeof(vmem_t));
#endif
if (vm == NULL)
return (NULL);
if (vmem_init(vm, name, base, size, quantum, qcache_max,
@ -1302,7 +1417,6 @@ vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
void
vmem_destroy(vmem_t *vm)
{
VMEM_LIST_LOCK();
LIST_REMOVE(vm, vm_alllist);
VMEM_LIST_UNLOCK();
@ -1324,7 +1438,6 @@ int
vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp)
{
const int strat __unused = flags & VMEM_FITMASK;
qcache_t *qc;
flags &= VMEM_FLAGS;
MPASS(size > 0);
@ -1332,7 +1445,10 @@ vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp)
if ((flags & M_NOWAIT) == 0)
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_alloc");
#ifdef _KERNEL
if (size <= vm->vm_qcache_max) {
qcache_t *qc;
/*
* Resource 0 cannot be cached, so avoid a blocking allocation
* in qc_import() and give the vmem_xalloc() call below a chance
@ -1344,6 +1460,7 @@ vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp)
if (__predict_true(*addrp != 0))
return (0);
}
#endif
return (vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX,
flags, addrp));
@ -1463,14 +1580,17 @@ out:
void
vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
{
qcache_t *qc;
MPASS(size > 0);
#ifdef _KERNEL
if (size <= vm->vm_qcache_max &&
__predict_true(addr >= VMEM_ADDR_QCACHE_MIN)) {
qcache_t *qc;
qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift];
uma_zfree(qc->qc_cache, (void *)addr);
} else
#endif
vmem_xfree(vm, addr, size);
}
@ -1564,11 +1684,13 @@ vmem_size(vmem_t *vm, int typemask)
return (0);
default:
panic("vmem_size");
return (0);
}
}
/* ---- debug */
#ifdef _KERNEL
#if defined(DDB) || defined(DIAGNOSTIC)
static void bt_dump(const bt_t *, int (*)(const char *, ...)
@ -1820,3 +1942,4 @@ vmem_check(vmem_t *vm)
}
#endif /* defined(DIAGNOSTIC) */
#endif /* _KERNEL */

View file

@ -33,8 +33,6 @@
#include <sys/types.h>
#ifdef _KERNEL
typedef struct vmem vmem_t;
typedef uintptr_t vmem_addr_t;
@ -44,10 +42,24 @@ typedef size_t vmem_size_t;
#define VMEM_ADDR_QCACHE_MIN 1
#define VMEM_ADDR_MAX (~(vmem_addr_t)0)
/* vmem_size typemask */
#define VMEM_ALLOC 0x01
#define VMEM_FREE 0x02
#define VMEM_MAXFREE 0x10
typedef int (vmem_import_t)(void *, vmem_size_t, int, vmem_addr_t *);
typedef void (vmem_release_t)(void *, vmem_addr_t, vmem_size_t);
typedef void (vmem_reclaim_t)(vmem_t *, int);
#ifndef _KERNEL
#define M_NOWAIT 0x0800 /* userspace hack */
#define M_FIRSTFIT 0x1000 /* only for vmem, fast fit */
#define M_BESTFIT 0x2000 /* only for vmem, low fragmentation */
#define M_NEXTFIT 0x8000 /* only for vmem, follow cursor */
#endif
__BEGIN_DECLS
/*
* Create a vmem:
* name - Name of the region
@ -134,11 +146,6 @@ void vmem_printall(const char *, int (*fn)(const char *, ...)
__printflike(1, 2));
void vmem_startup(void);
/* vmem_size typemask */
#define VMEM_ALLOC 0x01
#define VMEM_FREE 0x02
#define VMEM_MAXFREE 0x10
#endif /* _KERNEL */
__END_DECLS
#endif /* !_SYS_VMEM_H_ */

View file

@ -97,7 +97,7 @@ CFLAGS+=-I${.CURDIR} \
-I${.CURDIR}/../../contrib/lib9p \
-I${SRCTOP}/sys
LIBADD+= vmmapi md nv pthread z util sbuf cam 9p
LIBADD+= vmmapi md nv uvmem pthread z util sbuf cam 9p
.if ${MK_BHYVE_SNAPSHOT} != "no"
LIBADD+= ucl xo