318 lines
7.5 KiB
C
318 lines
7.5 KiB
C
/* $OpenBSD: sys_futex.c,v 1.22 2023/08/14 07:42:34 miod Exp $ */
|
|
|
|
/*
|
|
* Copyright (c) 2016-2017 Martin Pieuchot
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/mount.h>
|
|
#include <sys/syscallargs.h>
|
|
#include <sys/pool.h>
|
|
#include <sys/time.h>
|
|
#include <sys/rwlock.h>
|
|
#include <sys/futex.h>
|
|
|
|
#ifdef KTRACE
|
|
#include <sys/ktrace.h>
|
|
#endif
|
|
|
|
#include <uvm/uvm.h>
|
|
|
|
/*
|
|
* Kernel representation of a futex.
|
|
*/
|
|
struct futex {
|
|
LIST_ENTRY(futex) ft_list; /* list of all futexes */
|
|
TAILQ_HEAD(, proc) ft_threads; /* sleeping queue */
|
|
struct uvm_object *ft_obj; /* UVM object */
|
|
struct vm_amap *ft_amap; /* UVM amap */
|
|
voff_t ft_off; /* UVM offset */
|
|
unsigned int ft_refcnt; /* # of references */
|
|
};
|
|
|
|
/* Syscall helpers. */
|
|
int futex_wait(uint32_t *, uint32_t, const struct timespec *, int);
|
|
int futex_wake(uint32_t *, uint32_t, int);
|
|
int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t, int);
|
|
|
|
/* Flags for futex_get(). */
|
|
#define FT_CREATE 0x1 /* Create a futex if it doesn't exist. */
|
|
#define FT_PRIVATE 0x2 /* Futex is process-private. */
|
|
|
|
struct futex *futex_get(uint32_t *, int);
|
|
void futex_put(struct futex *);
|
|
|
|
/*
|
|
* The global futex lock serializes futex(2) calls so that no wakeup
|
|
* event is lost, and protects all futex lists and futex states.
|
|
*/
|
|
struct rwlock ftlock = RWLOCK_INITIALIZER("futex");
|
|
static struct futex_list ftlist_shared =
|
|
LIST_HEAD_INITIALIZER(ftlist_shared);
|
|
struct pool ftpool;
|
|
|
|
|
|
void
|
|
futex_init(void)
|
|
{
|
|
pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE,
|
|
PR_WAITOK | PR_RWLOCK, "futexpl", NULL);
|
|
}
|
|
|
|
int
|
|
sys_futex(struct proc *p, void *v, register_t *retval)
|
|
{
|
|
struct sys_futex_args /* {
|
|
syscallarg(uint32_t *) f;
|
|
syscallarg(int) op;
|
|
syscallarg(inr) val;
|
|
syscallarg(const struct timespec *) timeout;
|
|
syscallarg(uint32_t *) g;
|
|
} */ *uap = v;
|
|
uint32_t *uaddr = SCARG(uap, f);
|
|
int op = SCARG(uap, op);
|
|
uint32_t val = SCARG(uap, val);
|
|
const struct timespec *timeout = SCARG(uap, timeout);
|
|
void *g = SCARG(uap, g);
|
|
int flags = 0;
|
|
int error = 0;
|
|
|
|
if (op & FUTEX_PRIVATE_FLAG)
|
|
flags |= FT_PRIVATE;
|
|
|
|
rw_enter_write(&ftlock);
|
|
switch (op) {
|
|
case FUTEX_WAIT:
|
|
case FUTEX_WAIT_PRIVATE:
|
|
error = futex_wait(uaddr, val, timeout, flags);
|
|
break;
|
|
case FUTEX_WAKE:
|
|
case FUTEX_WAKE_PRIVATE:
|
|
*retval = futex_wake(uaddr, val, flags);
|
|
break;
|
|
case FUTEX_REQUEUE:
|
|
case FUTEX_REQUEUE_PRIVATE:
|
|
*retval = futex_requeue(uaddr, val, g, (u_long)timeout, flags);
|
|
break;
|
|
default:
|
|
error = ENOSYS;
|
|
break;
|
|
}
|
|
rw_exit_write(&ftlock);
|
|
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Return an existing futex matching userspace address ``uaddr''.
|
|
*
|
|
* If such futex does not exist and FT_CREATE is given, create it.
|
|
*/
|
|
struct futex *
|
|
futex_get(uint32_t *uaddr, int flags)
|
|
{
|
|
struct proc *p = curproc;
|
|
vm_map_t map = &p->p_vmspace->vm_map;
|
|
vm_map_entry_t entry;
|
|
struct uvm_object *obj = NULL;
|
|
struct vm_amap *amap = NULL;
|
|
voff_t off = (vaddr_t)uaddr;
|
|
struct futex *f;
|
|
struct futex_list *ftlist = &p->p_p->ps_ftlist;
|
|
|
|
rw_assert_wrlock(&ftlock);
|
|
|
|
if (!(flags & FT_PRIVATE)) {
|
|
vm_map_lock_read(map);
|
|
if (uvm_map_lookup_entry(map, (vaddr_t)uaddr, &entry) &&
|
|
entry->inheritance == MAP_INHERIT_SHARE) {
|
|
if (UVM_ET_ISOBJ(entry)) {
|
|
ftlist = &ftlist_shared;
|
|
obj = entry->object.uvm_obj;
|
|
off = entry->offset +
|
|
((vaddr_t)uaddr - entry->start);
|
|
} else if (entry->aref.ar_amap) {
|
|
ftlist = &ftlist_shared;
|
|
amap = entry->aref.ar_amap;
|
|
off = ptoa(entry->aref.ar_pageoff) +
|
|
((vaddr_t)uaddr - entry->start);
|
|
}
|
|
}
|
|
vm_map_unlock_read(map);
|
|
}
|
|
|
|
LIST_FOREACH(f, ftlist, ft_list) {
|
|
if (f->ft_obj == obj && f->ft_amap == amap &&
|
|
f->ft_off == off) {
|
|
f->ft_refcnt++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ((f == NULL) && (flags & FT_CREATE)) {
|
|
/*
|
|
* We rely on the rwlock to ensure that no other thread
|
|
* create the same futex.
|
|
*/
|
|
f = pool_get(&ftpool, PR_WAITOK);
|
|
TAILQ_INIT(&f->ft_threads);
|
|
f->ft_obj = obj;
|
|
f->ft_amap = amap;
|
|
f->ft_off = off;
|
|
f->ft_refcnt = 1;
|
|
LIST_INSERT_HEAD(ftlist, f, ft_list);
|
|
}
|
|
|
|
return f;
|
|
}
|
|
|
|
/*
|
|
* Release a given futex.
|
|
*/
|
|
void
|
|
futex_put(struct futex *f)
|
|
{
|
|
rw_assert_wrlock(&ftlock);
|
|
|
|
KASSERT(f->ft_refcnt > 0);
|
|
|
|
--f->ft_refcnt;
|
|
if (f->ft_refcnt == 0) {
|
|
KASSERT(TAILQ_EMPTY(&f->ft_threads));
|
|
LIST_REMOVE(f, ft_list);
|
|
pool_put(&ftpool, f);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Put the current thread on the sleep queue of the futex at address
|
|
* ``uaddr''. Let it sleep for the specified ``timeout'' time, or
|
|
* indefinitely if the argument is NULL.
|
|
*/
|
|
int
|
|
futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout,
|
|
int flags)
|
|
{
|
|
struct proc *p = curproc;
|
|
struct futex *f;
|
|
uint64_t nsecs = INFSLP;
|
|
uint32_t cval;
|
|
int error;
|
|
|
|
/*
|
|
* After reading the value a race is still possible but
|
|
* we deal with it by serializing all futex syscalls.
|
|
*/
|
|
rw_assert_wrlock(&ftlock);
|
|
|
|
/*
|
|
* Read user space futex value
|
|
*/
|
|
if ((error = copyin32(uaddr, &cval)))
|
|
return error;
|
|
|
|
/* If the value changed, stop here. */
|
|
if (cval != val)
|
|
return EAGAIN;
|
|
|
|
if (timeout != NULL) {
|
|
struct timespec ts;
|
|
|
|
if ((error = copyin(timeout, &ts, sizeof(ts))))
|
|
return error;
|
|
#ifdef KTRACE
|
|
if (KTRPOINT(p, KTR_STRUCT))
|
|
ktrreltimespec(p, &ts);
|
|
#endif
|
|
if (ts.tv_sec < 0 || !timespecisvalid(&ts))
|
|
return EINVAL;
|
|
nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(&ts), MAXTSLP));
|
|
}
|
|
|
|
f = futex_get(uaddr, flags | FT_CREATE);
|
|
TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link);
|
|
p->p_futex = f;
|
|
|
|
error = rwsleep_nsec(p, &ftlock, PWAIT|PCATCH, "fsleep", nsecs);
|
|
if (error == ERESTART)
|
|
error = ECANCELED;
|
|
else if (error == EWOULDBLOCK) {
|
|
/* A race occurred between a wakeup and a timeout. */
|
|
if (p->p_futex == NULL)
|
|
error = 0;
|
|
else
|
|
error = ETIMEDOUT;
|
|
}
|
|
|
|
/* Remove ourself if we haven't been awaken. */
|
|
if ((f = p->p_futex) != NULL) {
|
|
p->p_futex = NULL;
|
|
TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
|
|
futex_put(f);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Wakeup at most ``n'' sibling threads sleeping on a futex at address
|
|
* ``uaddr'' and requeue at most ``m'' sibling threads on a futex at
|
|
* address ``uaddr2''.
|
|
*/
|
|
int
|
|
futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m,
|
|
int flags)
|
|
{
|
|
struct futex *f, *g;
|
|
struct proc *p;
|
|
uint32_t count = 0;
|
|
|
|
rw_assert_wrlock(&ftlock);
|
|
|
|
f = futex_get(uaddr, flags);
|
|
if (f == NULL)
|
|
return 0;
|
|
|
|
while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) {
|
|
p->p_futex = NULL;
|
|
TAILQ_REMOVE(&f->ft_threads, p, p_fut_link);
|
|
futex_put(f);
|
|
|
|
if (count < n) {
|
|
wakeup_one(p);
|
|
} else if (uaddr2 != NULL) {
|
|
g = futex_get(uaddr2, FT_CREATE);
|
|
TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link);
|
|
p->p_futex = g;
|
|
}
|
|
count++;
|
|
}
|
|
|
|
futex_put(f);
|
|
|
|
return count;
|
|
}
|
|
|
|
/*
|
|
* Wakeup at most ``n'' sibling threads sleeping on a futex at address
|
|
* ``uaddr''.
|
|
*/
|
|
int
|
|
futex_wake(uint32_t *uaddr, uint32_t n, int flags)
|
|
{
|
|
return futex_requeue(uaddr, n, NULL, 0, flags);
|
|
}
|