From 026114488d093451cd9d8505dd1d80bc45f2a406 Mon Sep 17 00:00:00 2001 From: Ethan Date: Tue, 20 May 2025 10:36:01 -0600 Subject: [PATCH 01/53] begin initial implementation of servicing pools --- sys/kern/files.kern | 1 + sys/kern/sys_aiosp.c | 315 +++++++++++++++++++++++++++++++++++++++++++ sys/sys/aio.h | 28 ++++ 3 files changed, 344 insertions(+) create mode 100644 sys/kern/sys_aiosp.c diff --git a/sys/kern/files.kern b/sys/kern/files.kern index 4c8967d61dcd6..e1482b27a9b2f 100644 --- a/sys/kern/files.kern +++ b/sys/kern/files.kern @@ -161,6 +161,7 @@ file kern/subr_vmem.c kern file kern/subr_workqueue.c kern file kern/subr_xcall.c kern file kern/sys_aio.c aio +file kern/sys_aiosp.c aio file kern/sys_descrip.c kern file kern/sys_epoll.c kern file kern/sys_eventfd.c kern diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c new file mode 100644 index 0000000000000..7e4b4a2d3b173 --- /dev/null +++ b/sys/kern/sys_aiosp.c @@ -0,0 +1,315 @@ +/* $NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp $ */ + +/* + * Copyright (c) 2007 Mindaugas Rasiukevicius + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Implementation of service pools to support asynchronous I/O + * DEFINED ... + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp $"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int aiost_create(struct aiosp *, struct aiost **); +static int aiost_terminate(struct aiost *); +static int aiost_configure(struct aiost *, struct aio_job *); +static int aiost_process_rw(struct aiost *); +static int aiost_process_sync(struct aiost *); +static void aiost_entry(void *); + +/* + * Distributes pending jobs to servicing threads. Allocates the requisite number + * of servicing threads, creates new threads if necessary, then assigns a single + * job to be completed by a servicing thread. + */ +int +aiosp_distribute_jobs(struct aiosp *sp) { + mutex_enter(&sp->mtx); + + /* + * Check to see if the number of pending jobs exceeds the number of free + * service threads. If it does then that means we need to create new + * threads. + */ + if(sp->jobs_pending > sp->nthreads_free) { + int nthreads_new = sp->jobs_pending - sp->nthreads_free; + + for(int i = 0; i < nthreads_new; i++) { + struct aiost *aiost; + + int error = aiost_create(sp, &aiost); + if(error) { + mutex_exit(&sp->mtx); + return error; + } + } + } + + /* + * Loop over all pending jobs and assign a thread from the freelist. + * Move from freelist to active. Configure service thread to work with + * respect to the job (and importantly the buffer associated with that + * job) + */ + struct aio_job *a_job; + TAILQ_FOREACH(a_job, &sp->jobs, list) { + struct aiost *aiost = TAILQ_LAST(&sp->freelist, aiost_list); + + TAILQ_REMOVE(&sp->freelist, aiost, list); + sp->nthreads_free--; + + TAILQ_INSERT_TAIL(&sp->active, aiost, list); + sp->nthreads_active++; + + int error = aiost_configure(aiost, a_job); + if(error) { + mutex_exit(&sp->mtx); + return error; + } + } + + mutex_exit(&sp->mtx); + + return 0; +} + +/* + * Initializes a servicing pool. + */ +int +aiosp_initialize(struct aiosp **ret) { + struct aiosp *sp; + + sp = kmem_zalloc(sizeof(struct aiosp), KM_SLEEP); + + mutex_init(&sp->mtx, MUTEX_DEFAULT, IPL_NONE); + TAILQ_INIT(&sp->freelist); + TAILQ_INIT(&sp->active); + TAILQ_INIT(&sp->jobs); + + return 0; +} + +/* + * Each process keeps track of all the service threads instantiated to service + * an asynchronous operation by the process. When a process is terminated we + * must also terminate all of its active and pending asynchronous operation. + */ +int +aiosp_destroy(struct aioproc *proc) { + struct aiosp *sp = proc->sp; + + mutex_enter(&sp->mtx); + mutex_enter(&proc->aio_mtx); + + /* + * Dance around locks. Iterate over every service thread associated with the + * process and terminate. + */ + struct aiost *st; + TAILQ_FOREACH(st, &proc->active_jobs, list) { + int error = aiost_terminate(st); + if(error) { + mutex_exit(&proc->aio_mtx); + mutex_exit(&sp->mtx); + + return error; + } + + kmem_free(st, sizeof(struct aiost)); + } + + kmem_free(sp, sizeof(struct aiosp)); + + mutex_exit(&proc->aio_mtx); + mutex_exit(&sp->mtx); + + return 0; +} + +/* + * Create and initialise a new servicing thread and append it to the freelist. + */ +static int +aiost_create(struct aiosp *sp, struct aiost **ret) { + struct proc *p = curlwp->l_proc; + struct aiost *st; + + st = kmem_zalloc(sizeof(struct aiost), KM_SLEEP); + + mutex_init(&st->mtx, MUTEX_DEFAULT, IPL_NONE); + cv_init(&st->service_cv, "aioservice"); + + int error = kthread_create(PRI_KERNEL, 0, NULL, aiost_entry, + st, &st->lwp, "aio_%d_%d", p->p_pid, sp->nthreads_total); + if(error) { + return error; + } + + TAILQ_INSERT_TAIL(&sp->freelist, st, list); + sp->nthreads_free++; + sp->nthreads_total++; + + if(ret) { + *ret = st; + } + + return 0; +} + +/* + * Servicing thread entry point. Process the operation. Notify all those + * blocking on the completion of the operation. Send a signal if necessary. And + * then mark the current servicing thread as free. + */ +static void +aiost_entry(void *arg) { + struct aiost *st = arg; + struct aiosp *sp = st->aiosp; + + /* + * We want to handle abrupt process terminations effectively. The reason + * why we check st->exit twice, before and after we acquire the mutex is + * to account for the case where + */ + for(;;) { + if(st->exit) goto exit; + int error = cv_wait_sig(&st->service_cv, &st->mtx); + mutex_enter(&st->mtx); + if(error) goto next; + if(!st->exit) goto process; +exit: + /* + * Remove st from the list of active service threads, do NOT + * append to the freelist, dance around locks, exit kthread + */ + mutex_enter(&sp->mtx); + TAILQ_REMOVE(&sp->freelist, st, list); + sp->nthreads_free--; + mutex_exit(&sp->mtx); + mutex_exit(&st->mtx); + kthread_exit(0); +process: + // TODO figure a way communicate error codes to userspace + // effectively. + if(st->job->aio_op & (AIO_READ | AIO_WRITE)) { + error = aiost_process_rw(st); + if(error) { + mutex_exit(&st->mtx); + goto next; + } + } else if(st->job->aio_op & (AIO_READ | AIO_WRITE)) { + error = aiost_process_sync(st); + if(error) { + mutex_exit(&st->mtx); + goto next; + } + } else { + panic("aio_process: invalid operation code\n"); + } +next: + /* + * Remove st from list of active service threads, append to + * freelist, dance around locks, then iterate loop and block on + * st->service_cv + */ + mutex_exit(&st->mtx); + mutex_enter(&sp->mtx); + + TAILQ_REMOVE(&sp->freelist, st, list); + sp->nthreads_free--; + + TAILQ_INSERT_TAIL(&sp->active, st, list); + sp->nthreads_active++; + + mutex_exit(&sp->mtx); + } +} + +/* + * processes a read/write asynchronous operations + */ +static int +aiost_process_rw(struct aiost *aiost) { + return 0; +} + +/* + * processes a sync/dsync asynchronous operations + */ +static int +aiost_process_sync(struct aiost *aiost) { + return 0; +} + +/* + * Destroy a servicing thread + */ +static int +aiost_terminate(struct aiost *st) { + mutex_enter(&st->mtx); + + st->exit = 1; + cv_signal(&st->service_cv); + kthread_join(st->lwp); + + mutex_exit(&st->mtx); + mutex_destroy(&st->mtx); + + kmem_free(st, sizeof(struct aiost)); + + return 0; +} + +/* + * Configure a servicing thread to handle a specific job. Initialise operation + * and establish the 'shared' memory region. + */ +static int +aiost_configure(struct aiost *aiost, struct aio_job *job) { + return 0; +} diff --git a/sys/sys/aio.h b/sys/sys/aio.h index cbf0959b02cee..a8dfff85a94f3 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -99,6 +99,29 @@ struct aio_job { struct lio_req *lio; }; +struct aiosp; +struct aiost { + TAILQ_ENTRY(aiost) list; + struct aiosp *aiosp; /* Servicing pool of this thread */ + kmutex_t mtx; /* Protects this structure */ + kcondvar_t service_cv; /* Signal to activate thread */ + struct aio_job *job; /* Jobs associated with the thread */ + struct lwp *lwp; /* Servicing thread LWP */ + int exit; /* Exit code */ +}; + +TAILQ_HEAD(aiost_list, aiost); +struct aiosp { + struct aiost_list freelist; /* Available service threads */ + int nthreads_free; /* Length of freelist */ + struct aiost_list active; /* Active servicing threads */ + int nthreads_active; /* length of active list */ + TAILQ_HEAD(, aio_job) jobs; /* Queue of pending jobs */ + int jobs_pending; /* Number of pending jobs */ + kmutex_t mtx; /* Protects structure */ + int nthreads_total; /* Number of total servicing threads */ +}; + /* LIO structure */ struct lio_req { u_int refcnt; /* Reference counter */ @@ -114,12 +137,17 @@ struct aioproc { unsigned int jobs_count; /* Count of the jobs */ TAILQ_HEAD(, aio_job) jobs_queue;/* Queue of the AIO jobs */ struct lwp *aio_worker; /* AIO worker thread */ + struct aiost_list active_jobs; /* List of active servicing threads */ + struct aiosp *sp; /* Servicing pool of the process */ }; extern u_int aio_listio_max; /* Prototypes */ void aio_print_jobs(void (*)(const char *, ...) __printflike(1, 2)); int aio_suspend1(struct lwp *, struct aiocb **, int, struct timespec *); +int aiosp_distribute_jobs(struct aiosp *); +int aiosp_initialize(struct aiosp **); +int aiosp_destroy(struct aioproc *); #endif /* _KERNEL */ From e695ae0af6c8f1ffacb711bc0b00e7b250cbe3d2 Mon Sep 17 00:00:00 2001 From: Ethan Date: Tue, 20 May 2025 10:46:18 -0600 Subject: [PATCH 02/53] update license --- sys/kern/sys_aiosp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 7e4b4a2d3b173..8b58074e81056 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -1,7 +1,7 @@ /* $NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp $ */ /* - * Copyright (c) 2007 Mindaugas Rasiukevicius + * Copyright (c) 2025 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,7 +28,6 @@ /* * Implementation of service pools to support asynchronous I/O - * DEFINED ... */ #include From de747898a6e152d83e3f3a3b339d5414891086b3 Mon Sep 17 00:00:00 2001 From: Ethan Date: Tue, 20 May 2025 10:49:39 -0600 Subject: [PATCH 03/53] more detailed comments --- sys/sys/aio.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/sys/aio.h b/sys/sys/aio.h index a8dfff85a94f3..fc1c5a0959b46 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -99,6 +99,7 @@ struct aio_job { struct lio_req *lio; }; +/* Structure for AIO servicing thread */ struct aiosp; struct aiost { TAILQ_ENTRY(aiost) list; @@ -110,6 +111,7 @@ struct aiost { int exit; /* Exit code */ }; +/* Structure for AIO servicing pool */ TAILQ_HEAD(aiost_list, aiost); struct aiosp { struct aiost_list freelist; /* Available service threads */ From fc634ed4a8be7d968bdd07437430485981a0b67a Mon Sep 17 00:00:00 2001 From: Ethan Date: Tue, 20 May 2025 11:12:55 -0600 Subject: [PATCH 04/53] more precise termination routine --- sys/kern/sys_aiosp.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 8b58074e81056..09f4941388e76 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -211,17 +211,17 @@ aiost_entry(void *arg) { struct aiosp *sp = st->aiosp; /* - * We want to handle abrupt process terminations effectively. The reason - * why we check st->exit twice, before and after we acquire the mutex is - * to account for the case where + * We want to handle abrupt process terminations effectively. We use + * st->exit to indicate that the thread must exit. When a thread is + * terminated aiost_terminate(st) unblocks those sleeping on + * st->service_cv */ for(;;) { - if(st->exit) goto exit; int error = cv_wait_sig(&st->service_cv, &st->mtx); mutex_enter(&st->mtx); if(error) goto next; if(!st->exit) goto process; -exit: + /* * Remove st from the list of active service threads, do NOT * append to the freelist, dance around locks, exit kthread @@ -286,7 +286,9 @@ aiost_process_sync(struct aiost *aiost) { } /* - * Destroy a servicing thread + * Destroy a servicing thread. Set st->exit high such that when we unblock the + * thread blocking on st->service_cv it will invoke an exit routine within + * aiost_entry. */ static int aiost_terminate(struct aiost *st) { @@ -296,9 +298,9 @@ aiost_terminate(struct aiost *st) { cv_signal(&st->service_cv); kthread_join(st->lwp); + cv_destroy(&st->service_cv); mutex_exit(&st->mtx); mutex_destroy(&st->mtx); - kmem_free(st, sizeof(struct aiost)); return 0; From 905b3f17c54eecb0457ec2f7b548786b1fe7e469 Mon Sep 17 00:00:00 2001 From: Ethan Date: Thu, 22 May 2025 13:35:12 -0600 Subject: [PATCH 05/53] implement shared memory between the servicing thread and userspace --- sys/kern/sys_aiosp.c | 59 +++++++++++++++++++++++++++++++++++++++++--- sys/sys/aio.h | 4 ++- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 09f4941388e76..2acafa39706ff 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -55,7 +55,7 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp static int aiost_create(struct aiosp *, struct aiost **); static int aiost_terminate(struct aiost *); -static int aiost_configure(struct aiost *, struct aio_job *); +static int aiost_configure(struct aiost *, struct aio_job *, vaddr_t *); static int aiost_process_rw(struct aiost *); static int aiost_process_sync(struct aiost *); static void aiost_entry(void *); @@ -104,7 +104,7 @@ aiosp_distribute_jobs(struct aiosp *sp) { TAILQ_INSERT_TAIL(&sp->active, aiost, list); sp->nthreads_active++; - int error = aiost_configure(aiost, a_job); + int error = aiost_configure(aiost, a_job, &sp->kbuf); if(error) { mutex_exit(&sp->mtx); return error; @@ -311,6 +311,59 @@ aiost_terminate(struct aiost *st) { * and establish the 'shared' memory region. */ static int -aiost_configure(struct aiost *aiost, struct aio_job *job) { +aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) { + struct vmspace *vm = job->p->p_vmspace; + struct aiocb *aiocb = &job->aiocbp; + + // TODO handle sync and dsync + vm_prot_t protections = VM_PROT_NONE; + if(job->aio_op == AIO_READ) { + protections = VM_PROT_READ; + } else if(job->aio_op == AIO_WRITE) { + protections = VM_PROT_READ | VM_PROT_WRITE; + } else { + return 0; + } + + /* + * To account for the case where the memory is anonymously mapped and + * has not yet been fulfilled. + */ + int error = uvm_vslock(vm, job->aiocb_uptr, aiocb->aio_nbytes, + protections); + if(error) { + return error; + } + + vaddr_t kva = uvm_km_alloc(kernel_map, aiocb->aio_nbytes, 0, + UVM_KMF_VAONLY); + if(!kva) { + uvm_vsunlock(vm, job->aiocb_uptr, aiocb->aio_nbytes); + return ENOMEM; + } + + /* + * Extract physical memory and map to the kernel + */ + for(vaddr_t uva = trunc_page((vaddr_t)aiocb->aio_buf); + uva < round_page((vaddr_t)aiocb->aio_buf + aiocb->aio_nbytes); + uva += PAGE_SIZE) { + paddr_t upa; + int ret = pmap_extract(vm_map_pmap(&vm->vm_map), uva, &upa); + if(!ret) { + uvm_km_free(kernel_map, kva, aiocb->aio_nbytes, + UVM_KMF_VAONLY); + uvm_vsunlock(vm, job->aiocb_uptr, + aiocb->aio_nbytes); + return EFAULT; + } + + pmap_kenter_pa(kva + (uva - trunc_page((vaddr_t)aiocb->aio_buf)), + upa, protections, 0); + } + + pmap_update(pmap_kernel()); + *kbuf = kva; + return 0; } diff --git a/sys/sys/aio.h b/sys/sys/aio.h index fc1c5a0959b46..91129b90b9480 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -95,6 +95,7 @@ struct aio_job { int aio_op; /* Operation code */ struct aiocb aiocbp; /* AIO data structure */ void *aiocb_uptr; /* User-space pointer for identification of job */ + struct proc *p; /* Process that instantiated the job */ TAILQ_ENTRY(aio_job) list; struct lio_req *lio; }; @@ -108,7 +109,7 @@ struct aiost { kcondvar_t service_cv; /* Signal to activate thread */ struct aio_job *job; /* Jobs associated with the thread */ struct lwp *lwp; /* Servicing thread LWP */ - int exit; /* Exit code */ + int exit; /* Signifies an exit routine */ }; /* Structure for AIO servicing pool */ @@ -122,6 +123,7 @@ struct aiosp { int jobs_pending; /* Number of pending jobs */ kmutex_t mtx; /* Protects structure */ int nthreads_total; /* Number of total servicing threads */ + vaddr_t kbuf; /* Shared memory buffer */ }; /* LIO structure */ From 53a71deb099688b6bcd6a53f75262c5ceb116f30 Mon Sep 17 00:00:00 2001 From: Ethan Date: Sat, 24 May 2025 07:26:19 -0600 Subject: [PATCH 06/53] backport existing implementations of aio rw/sync --- sys/kern/sys_aiosp.c | 117 +++++++++++++++++++++++++++++++++++++++++-- sys/sys/aio.h | 2 +- 2 files changed, 114 insertions(+), 5 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 2acafa39706ff..71d90b1d86050 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -42,6 +42,7 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp #include #include #include +#include #include #include #include @@ -94,8 +95,8 @@ aiosp_distribute_jobs(struct aiosp *sp) { * respect to the job (and importantly the buffer associated with that * job) */ - struct aio_job *a_job; - TAILQ_FOREACH(a_job, &sp->jobs, list) { + struct aio_job *job; + TAILQ_FOREACH(job, &sp->jobs, list) { struct aiost *aiost = TAILQ_LAST(&sp->freelist, aiost_list); TAILQ_REMOVE(&sp->freelist, aiost, list); @@ -104,7 +105,7 @@ aiosp_distribute_jobs(struct aiosp *sp) { TAILQ_INSERT_TAIL(&sp->active, aiost, list); sp->nthreads_active++; - int error = aiost_configure(aiost, a_job, &sp->kbuf); + int error = aiost_configure(aiost, job, &aiost->kbuf); if(error) { mutex_exit(&sp->mtx); return error; @@ -274,6 +275,74 @@ aiost_entry(void *arg) { */ static int aiost_process_rw(struct aiost *aiost) { + struct aio_job *job = aiost->job; + struct aiocb *aiocbp = &job->aiocbp; + struct file *fp; + int fd = aiocbp->aio_fildes; + int error = 0; + + struct iovec aiov; + struct uio auio; + + if (aiocbp->aio_nbytes > SSIZE_MAX) { + error = SET_ERROR(EINVAL); + goto done; + } + + fp = fd_getfile(fd); + if (fp == NULL) { + error = SET_ERROR(EBADF); + goto done; + } + + aiov.iov_base = (void *)(uintptr_t)aiost->kbuf; + aiov.iov_len = aiocbp->aio_nbytes; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = aiocbp->aio_nbytes; + auio.uio_vmspace = NULL; + + if (job->aio_op & AIO_READ) { + /* + * Perform a Read operation + */ + KASSERT((job->aio_op & AIO_WRITE) == 0); + + if ((fp->f_flag & FREAD) == 0) { + fd_putfile(fd); + error = SET_ERROR(EBADF); + goto done; + } + auio.uio_rw = UIO_READ; + error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, + &auio, fp->f_cred, FOF_UPDATE_OFFSET); + } else { + /* + * Perform a Write operation + */ + KASSERT(job->aio_op & AIO_WRITE); + + if ((fp->f_flag & FWRITE) == 0) { + fd_putfile(fd); + error = SET_ERROR(EBADF); + goto done; + } + auio.uio_rw = UIO_WRITE; + error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, + &auio, fp->f_cred, FOF_UPDATE_OFFSET); + } + fd_putfile(fd); + + /* + * Store the result value + */ + job->aiocbp.aio_nbytes -= auio.uio_resid; + job->aiocbp._retval = (error == 0) ? + job->aiocbp.aio_nbytes : -1; +done: + job->aiocbp._errno = error; + job->aiocbp._state = JOB_DONE; + return 0; } @@ -282,6 +351,47 @@ aiost_process_rw(struct aiost *aiost) { */ static int aiost_process_sync(struct aiost *aiost) { + struct aio_job *job = aiost->job; + struct aiocb *aiocbp = &job->aiocbp; + struct file *fp; + int fd = aiocbp->aio_fildes; + int error = 0; + + /* + * Perform a file sync operation + */ + struct vnode *vp; + + if ((error = fd_getvnode(fd, &fp)) != 0) { + goto done; + } + + if ((fp->f_flag & FWRITE) == 0) { + fd_putfile(fd); + error = SET_ERROR(EBADF); + goto done; + } + + vp = fp->f_vnode; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + if (job->aio_op & AIO_DSYNC) { + error = VOP_FSYNC(vp, fp->f_cred, + FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); + } else if (job->aio_op & AIO_SYNC) { + error = VOP_FSYNC(vp, fp->f_cred, + FSYNC_WAIT, 0, 0); + } + VOP_UNLOCK(vp); + fd_putfile(fd); + + /* + * Store the result value + */ + job->aiocbp._retval = (error == 0) ? 0 : -1; +done: + job->aiocbp._errno = error; + job->aiocbp._state = JOB_DONE; + return 0; } @@ -315,7 +425,6 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) { struct vmspace *vm = job->p->p_vmspace; struct aiocb *aiocb = &job->aiocbp; - // TODO handle sync and dsync vm_prot_t protections = VM_PROT_NONE; if(job->aio_op == AIO_READ) { protections = VM_PROT_READ; diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 91129b90b9480..f94e5d19ad8af 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -109,6 +109,7 @@ struct aiost { kcondvar_t service_cv; /* Signal to activate thread */ struct aio_job *job; /* Jobs associated with the thread */ struct lwp *lwp; /* Servicing thread LWP */ + vaddr_t kbuf; /* Shared memory buffer */ int exit; /* Signifies an exit routine */ }; @@ -123,7 +124,6 @@ struct aiosp { int jobs_pending; /* Number of pending jobs */ kmutex_t mtx; /* Protects structure */ int nthreads_total; /* Number of total servicing threads */ - vaddr_t kbuf; /* Shared memory buffer */ }; /* LIO structure */ From b795fb153e5d219db393e978844a4cfd175e02bb Mon Sep 17 00:00:00 2001 From: Ethan Date: Sat, 24 May 2025 07:59:27 -0600 Subject: [PATCH 07/53] aiosp_enqueue_job --- sys/kern/sys_aiosp.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 71d90b1d86050..433df81f6a675 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -134,6 +134,20 @@ aiosp_initialize(struct aiosp **ret) { return 0; } +/* + * Enqueue a job for processing by a servicing queue + */ +int aiosp_enqueue_job(struct aiosp *sp, struct aio_job *job) { + mutex_enter(&sp->mtx); + + TAILQ_INSERT_TAIL(&sp->jobs, job, list); + sp->jobs_pending++; + + mutex_exit(&sp->mtx); + + return 0; +} + /* * Each process keeps track of all the service threads instantiated to service * an asynchronous operation by the process. When a process is terminated we From eff6fb8bd44a09c5811fe6ba65145d696f6c4823 Mon Sep 17 00:00:00 2001 From: Ethan Date: Sat, 24 May 2025 09:18:44 -0600 Subject: [PATCH 08/53] code style --- sys/kern/sys_aiosp.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 433df81f6a675..ae30594ada487 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -121,7 +121,8 @@ aiosp_distribute_jobs(struct aiosp *sp) { * Initializes a servicing pool. */ int -aiosp_initialize(struct aiosp **ret) { +aiosp_initialize(struct aiosp **ret) +{ struct aiosp *sp; sp = kmem_zalloc(sizeof(struct aiosp), KM_SLEEP); @@ -137,9 +138,11 @@ aiosp_initialize(struct aiosp **ret) { /* * Enqueue a job for processing by a servicing queue */ -int aiosp_enqueue_job(struct aiosp *sp, struct aio_job *job) { +int +aiosp_enqueue_job(struct aiosp *sp, struct aio_job *job) +{ mutex_enter(&sp->mtx); - + TAILQ_INSERT_TAIL(&sp->jobs, job, list); sp->jobs_pending++; @@ -154,7 +157,8 @@ int aiosp_enqueue_job(struct aiosp *sp, struct aio_job *job) { * must also terminate all of its active and pending asynchronous operation. */ int -aiosp_destroy(struct aioproc *proc) { +aiosp_destroy(struct aioproc *proc) +{ struct aiosp *sp = proc->sp; mutex_enter(&sp->mtx); @@ -189,7 +193,8 @@ aiosp_destroy(struct aioproc *proc) { * Create and initialise a new servicing thread and append it to the freelist. */ static int -aiost_create(struct aiosp *sp, struct aiost **ret) { +aiost_create(struct aiosp *sp, struct aiost **ret) +{ struct proc *p = curlwp->l_proc; struct aiost *st; @@ -221,7 +226,8 @@ aiost_create(struct aiosp *sp, struct aiost **ret) { * then mark the current servicing thread as free. */ static void -aiost_entry(void *arg) { +aiost_entry(void *arg) +{ struct aiost *st = arg; struct aiosp *sp = st->aiosp; @@ -288,7 +294,8 @@ aiost_entry(void *arg) { * processes a read/write asynchronous operations */ static int -aiost_process_rw(struct aiost *aiost) { +aiost_process_rw(struct aiost *aiost) +{ struct aio_job *job = aiost->job; struct aiocb *aiocbp = &job->aiocbp; struct file *fp; From d8a167cb43f6976ea9bc900f746c007a975581d8 Mon Sep 17 00:00:00 2001 From: Ethan Date: Mon, 2 Jun 2025 07:50:05 -0600 Subject: [PATCH 09/53] code style and kernel option --- sys/conf/files | 1 + sys/kern/sys_aio.c | 27 ++++++++++++++++++--------- sys/kern/sys_aiosp.c | 43 ++++++++++++++++++++++--------------------- sys/sys/aio.h | 1 + 4 files changed, 42 insertions(+), 30 deletions(-) diff --git a/sys/conf/files b/sys/conf/files index 13c08412dffe0..da1e87df1fd03 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -51,6 +51,7 @@ defparam RTC_OFFSET defflag opt_pipe.h PIPE_SOCKETPAIR PIPE_NODIRECT defflag AIO +defflag AIOSP defflag MQUEUE defflag SEMAPHORE diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 92832b19ad388..354b2128918ea 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -484,6 +484,8 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) struct aio_job *a_job; struct aiocb aiocbp; struct sigevent *sig; + struct aiosp *sp; + struct aiost *aiost; int error; /* Non-accurate check for the limit */ @@ -520,15 +522,15 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) KASSERT(lio == NULL); } + aio = p->p_aio; /* * Look for already existing job. If found - the job is in-progress. * According to POSIX this is invalid, so return the error. */ - aio = p->p_aio; if (aio) { mutex_enter(&aio->aio_mtx); - TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { - if (a_job->aiocb_uptr != aiocb_uptr) + TAILQ_FOREACH(aiost, &aio->active_jobs, list) { + if (aiost->job->aiocb_uptr != aiocb_uptr) continue; mutex_exit(&aio->aio_mtx); return SET_ERROR(EINVAL); @@ -585,13 +587,20 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) return SET_ERROR(EAGAIN); } - TAILQ_INSERT_TAIL(&aio->jobs_queue, a_job, list); - aio->jobs_count++; - if (lio) - lio->refcnt++; - cv_signal(&aio->aio_worker_cv); - + sp = aio->sp; mutex_exit(&aio->aio_mtx); + + error = aiosp_enqueue_job(sp, a_job); + if (error) { + mutex_exit(&aio->aio_mtx); + return error; + } + + error = aiosp_distribute_jobs(sp); + if (error) { + mutex_exit(&aio->aio_mtx); + return error; + } /* * One would handle the errors only with aio_error() function. diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index ae30594ada487..b447c985c85b6 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -54,6 +54,8 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp #include #include +MODULE(MODULE_CLASS_MISC, aiosp, NULL); + static int aiost_create(struct aiosp *, struct aiost **); static int aiost_terminate(struct aiost *); static int aiost_configure(struct aiost *, struct aio_job *, vaddr_t *); @@ -78,7 +80,7 @@ aiosp_distribute_jobs(struct aiosp *sp) { if(sp->jobs_pending > sp->nthreads_free) { int nthreads_new = sp->jobs_pending - sp->nthreads_free; - for(int i = 0; i < nthreads_new; i++) { + for (int i = 0; i < nthreads_new; i++) { struct aiost *aiost; int error = aiost_create(sp, &aiost); @@ -125,7 +127,7 @@ aiosp_initialize(struct aiosp **ret) { struct aiosp *sp; - sp = kmem_zalloc(sizeof(struct aiosp), KM_SLEEP); + sp = kmem_zalloc(sizeof(*sp), KM_SLEEP); mutex_init(&sp->mtx, MUTEX_DEFAULT, IPL_NONE); TAILQ_INIT(&sp->freelist); @@ -171,17 +173,17 @@ aiosp_destroy(struct aioproc *proc) struct aiost *st; TAILQ_FOREACH(st, &proc->active_jobs, list) { int error = aiost_terminate(st); - if(error) { + if (error) { mutex_exit(&proc->aio_mtx); mutex_exit(&sp->mtx); return error; } - kmem_free(st, sizeof(struct aiost)); + kmem_free(st, sizeof(*st)); } - kmem_free(sp, sizeof(struct aiosp)); + kmem_free(sp, sizeof(*sp)); mutex_exit(&proc->aio_mtx); mutex_exit(&sp->mtx); @@ -198,14 +200,14 @@ aiost_create(struct aiosp *sp, struct aiost **ret) struct proc *p = curlwp->l_proc; struct aiost *st; - st = kmem_zalloc(sizeof(struct aiost), KM_SLEEP); + st = kmem_zalloc(sizeof(*st), KM_SLEEP); mutex_init(&st->mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&st->service_cv, "aioservice"); int error = kthread_create(PRI_KERNEL, 0, NULL, aiost_entry, st, &st->lwp, "aio_%d_%d", p->p_pid, sp->nthreads_total); - if(error) { + if (error) { return error; } @@ -213,7 +215,7 @@ aiost_create(struct aiosp *sp, struct aiost **ret) sp->nthreads_free++; sp->nthreads_total++; - if(ret) { + if (ret) { *ret = st; } @@ -237,7 +239,7 @@ aiost_entry(void *arg) * terminated aiost_terminate(st) unblocks those sleeping on * st->service_cv */ - for(;;) { + for (;;) { int error = cv_wait_sig(&st->service_cv, &st->mtx); mutex_enter(&st->mtx); if(error) goto next; @@ -256,15 +258,15 @@ aiost_entry(void *arg) process: // TODO figure a way communicate error codes to userspace // effectively. - if(st->job->aio_op & (AIO_READ | AIO_WRITE)) { + if (st->job->aio_op & (AIO_READ | AIO_WRITE)) { error = aiost_process_rw(st); - if(error) { + if (error) { mutex_exit(&st->mtx); goto next; } - } else if(st->job->aio_op & (AIO_READ | AIO_WRITE)) { + } else if (st->job->aio_op & AIO_SYNC) { error = aiost_process_sync(st); - if(error) { + if (error) { mutex_exit(&st->mtx); goto next; } @@ -358,8 +360,7 @@ aiost_process_rw(struct aiost *aiost) * Store the result value */ job->aiocbp.aio_nbytes -= auio.uio_resid; - job->aiocbp._retval = (error == 0) ? - job->aiocbp.aio_nbytes : -1; + job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; done: job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; @@ -432,7 +433,7 @@ aiost_terminate(struct aiost *st) { cv_destroy(&st->service_cv); mutex_exit(&st->mtx); mutex_destroy(&st->mtx); - kmem_free(st, sizeof(struct aiost)); + kmem_free(st, sizeof(*st)); return 0; } @@ -447,7 +448,7 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) { struct aiocb *aiocb = &job->aiocbp; vm_prot_t protections = VM_PROT_NONE; - if(job->aio_op == AIO_READ) { + if (job->aio_op == AIO_READ) { protections = VM_PROT_READ; } else if(job->aio_op == AIO_WRITE) { protections = VM_PROT_READ | VM_PROT_WRITE; @@ -461,13 +462,13 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) { */ int error = uvm_vslock(vm, job->aiocb_uptr, aiocb->aio_nbytes, protections); - if(error) { + if (error) { return error; } vaddr_t kva = uvm_km_alloc(kernel_map, aiocb->aio_nbytes, 0, UVM_KMF_VAONLY); - if(!kva) { + if (!kva) { uvm_vsunlock(vm, job->aiocb_uptr, aiocb->aio_nbytes); return ENOMEM; } @@ -475,12 +476,12 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) { /* * Extract physical memory and map to the kernel */ - for(vaddr_t uva = trunc_page((vaddr_t)aiocb->aio_buf); + for (vaddr_t uva = trunc_page((vaddr_t)aiocb->aio_buf); uva < round_page((vaddr_t)aiocb->aio_buf + aiocb->aio_nbytes); uva += PAGE_SIZE) { paddr_t upa; int ret = pmap_extract(vm_map_pmap(&vm->vm_map), uva, &upa); - if(!ret) { + if (!ret) { uvm_km_free(kernel_map, kva, aiocb->aio_nbytes, UVM_KMF_VAONLY); uvm_vsunlock(vm, job->aiocb_uptr, diff --git a/sys/sys/aio.h b/sys/sys/aio.h index f94e5d19ad8af..edea2ef9b9c99 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -150,6 +150,7 @@ extern u_int aio_listio_max; void aio_print_jobs(void (*)(const char *, ...) __printflike(1, 2)); int aio_suspend1(struct lwp *, struct aiocb **, int, struct timespec *); int aiosp_distribute_jobs(struct aiosp *); +int aiosp_enqueue_job(struct aiosp *, struct aio_job *); int aiosp_initialize(struct aiosp **); int aiosp_destroy(struct aioproc *); From c2878896288e2841f40a5f39293c6c2e4948dd0e Mon Sep 17 00:00:00 2001 From: Ethan Date: Mon, 2 Jun 2025 12:09:06 -0600 Subject: [PATCH 10/53] aiosp option --- sys/kern/files.kern | 2 +- sys/kern/sys_aio.c | 65 ++++++++++++++++++++++++++++++++++---------- sys/kern/sys_aiosp.c | 54 +++++++++++++++++++++++++++++++----- 3 files changed, 98 insertions(+), 23 deletions(-) diff --git a/sys/kern/files.kern b/sys/kern/files.kern index e1482b27a9b2f..ec5c2b578ceb0 100644 --- a/sys/kern/files.kern +++ b/sys/kern/files.kern @@ -161,7 +161,7 @@ file kern/subr_vmem.c kern file kern/subr_workqueue.c kern file kern/subr_xcall.c kern file kern/sys_aio.c aio -file kern/sys_aiosp.c aio +file kern/sys_aiosp.c aiosp file kern/sys_descrip.c kern file kern/sys_epoll.c kern file kern/sys_eventfd.c kern diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 354b2128918ea..b34cb6a125766 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -35,6 +35,7 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.50 2024/12/07 02:38:51 riastradh Exp $"); #ifdef _KERNEL_OPT +#include "opt_aiosp.h" #include "opt_ddb.h" #endif @@ -83,6 +84,7 @@ static void * aio_ehook; static void aio_worker(void *); static void aio_process(struct aio_job *); static void aio_sendsig(struct proc *, struct sigevent *); +static int aio_validate_conflicts(struct aioproc *, void *); static int aio_enqueue_job(int, void *, struct lio_req *); static void aio_exit(proc_t *, void *); @@ -473,6 +475,37 @@ aio_sendsig(struct proc *p, struct sigevent *sig) mutex_exit(&proc_lock); } +/* + * Ensure + */ +static int +aio_validate_conflicts(struct aioproc *aio, void *aiocb_uptr) +{ + mutex_enter(&aio->aio_mtx); + +#ifdef AIOSP + struct aiost *st; + TAILQ_FOREACH(st, &aio->active_jobs, list) { + if (st->job->aiocb_uptr != aiocb_uptr) + continue; + mutex_exit(&aio->aio_mtx); + return EINVAL; + } +#else + struct aio_job *a_job; + TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { + if (a_job->aiocb_uptr != aiocb_uptr) + continue; + mutex_exit(&aio->aio_mtx); + return EINVAL; + } +#endif + + mutex_exit(&aio->aio_mtx); + + return 0; +} + /* * Enqueue the job. */ @@ -484,8 +517,6 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) struct aio_job *a_job; struct aiocb aiocbp; struct sigevent *sig; - struct aiosp *sp; - struct aiost *aiost; int error; /* Non-accurate check for the limit */ @@ -522,20 +553,16 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) KASSERT(lio == NULL); } - aio = p->p_aio; /* * Look for already existing job. If found - the job is in-progress. * According to POSIX this is invalid, so return the error. */ + aio = p->p_aio; if (aio) { - mutex_enter(&aio->aio_mtx); - TAILQ_FOREACH(aiost, &aio->active_jobs, list) { - if (aiost->job->aiocb_uptr != aiocb_uptr) - continue; - mutex_exit(&aio->aio_mtx); - return SET_ERROR(EINVAL); + error = aio_validate_conflicts(aio, aiocb_uptr); + if (error) { + return SET_ERROR(error); } - mutex_exit(&aio->aio_mtx); } /* @@ -587,20 +614,28 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) return SET_ERROR(EAGAIN); } - sp = aio->sp; - mutex_exit(&aio->aio_mtx); - +#ifdef AIOSP + struct aiosp *sp = aio->sp; + error = aiosp_enqueue_job(sp, a_job); if (error) { mutex_exit(&aio->aio_mtx); - return error; + return SET_ERROR(error); } error = aiosp_distribute_jobs(sp); if (error) { mutex_exit(&aio->aio_mtx); - return error; + return SET_ERROR(error); } +#else + TAILQ_INSERT_TAIL(&aio->jobs_queue, a_job, list); + aio->jobs_count++; + if (lio) + lio->refcnt++; + cv_signal(&aio->aio_worker_cv); +#endif + mutex_exit(&aio->aio_mtx); /* * One would handle the errors only with aio_error() function. diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index b447c985c85b6..1cd892f3e00f7 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -63,6 +63,42 @@ static int aiost_process_rw(struct aiost *); static int aiost_process_sync(struct aiost *); static void aiost_entry(void *); +/* + * Tear down all service pools + */ +static int +aiosp_fini(void) +{ + int error = 0; + return error; +} + +/* + * Initialize global service pool state + */ +static int +aiosp_init(void) +{ + int error = 0; + return error; +} + +/* + * Module interface + */ +static int +aiosp_modcmd(modcmd_t cmd, void *arg) +{ + switch (cmd) { + case MODULE_CMD_INIT: + return aiosp_init(); + case MODULE_CMD_FINI: + return aiosp_fini(); + default: + return SET_ERROR(ENOTTY); + } +} + /* * Distributes pending jobs to servicing threads. Allocates the requisite number * of servicing threads, creates new threads if necessary, then assigns a single @@ -77,14 +113,14 @@ aiosp_distribute_jobs(struct aiosp *sp) { * service threads. If it does then that means we need to create new * threads. */ - if(sp->jobs_pending > sp->nthreads_free) { + if (sp->jobs_pending > sp->nthreads_free) { int nthreads_new = sp->jobs_pending - sp->nthreads_free; for (int i = 0; i < nthreads_new; i++) { struct aiost *aiost; int error = aiost_create(sp, &aiost); - if(error) { + if (error) { mutex_exit(&sp->mtx); return error; } @@ -108,7 +144,7 @@ aiosp_distribute_jobs(struct aiosp *sp) { sp->nthreads_active++; int error = aiost_configure(aiost, job, &aiost->kbuf); - if(error) { + if (error) { mutex_exit(&sp->mtx); return error; } @@ -232,6 +268,7 @@ aiost_entry(void *arg) { struct aiost *st = arg; struct aiosp *sp = st->aiosp; + struct aio_job *job; /* * We want to handle abrupt process terminations effectively. We use @@ -256,15 +293,14 @@ aiost_entry(void *arg) mutex_exit(&st->mtx); kthread_exit(0); process: - // TODO figure a way communicate error codes to userspace - // effectively. - if (st->job->aio_op & (AIO_READ | AIO_WRITE)) { + job = st->job; + if (job->aio_op & (AIO_READ | AIO_WRITE)) { error = aiost_process_rw(st); if (error) { mutex_exit(&st->mtx); goto next; } - } else if (st->job->aio_op & AIO_SYNC) { + } else if (job->aio_op & AIO_SYNC) { error = aiost_process_sync(st); if (error) { mutex_exit(&st->mtx); @@ -273,6 +309,10 @@ aiost_entry(void *arg) } else { panic("aio_process: invalid operation code\n"); } + + // touch the job directly + + // TOUCH AIOCPUPR??? OR TOUCH JOB->AIOCBP??? next: /* * Remove st from list of active service threads, append to From a9af4308f88f0c5fab2746e5d1d6a038d86ddbe2 Mon Sep 17 00:00:00 2001 From: Ethan Date: Tue, 3 Jun 2025 09:27:53 -0600 Subject: [PATCH 11/53] aiosp banks --- sys/kern/sys_aiosp.c | 132 +++++++++++++++++++++++++++++-------------- sys/sys/aio.h | 2 - 2 files changed, 90 insertions(+), 44 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 1cd892f3e00f7..888dd9bb00bff 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -56,12 +56,20 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp MODULE(MODULE_CLASS_MISC, aiosp, NULL); -static int aiost_create(struct aiosp *, struct aiost **); -static int aiost_terminate(struct aiost *); -static int aiost_configure(struct aiost *, struct aio_job *, vaddr_t *); -static int aiost_process_rw(struct aiost *); -static int aiost_process_sync(struct aiost *); -static void aiost_entry(void *); +static u_int aiosp_bank_max = NPRI_KTHREAD; +static struct aiosp **aiosp_bank; + +static int aiosp_initialize(struct aiosp **); +static int aiosp_destroy(struct aiosp *); + +static int aiost_create(struct aiosp *, struct aiost **); +static int aiost_terminate(struct aiost *); +static int aiost_configure(struct aiost *, struct aio_job *, vaddr_t *); +static int aiost_process_rw(struct aiost *); +static int aiost_process_sync(struct aiost *); +static void aiost_entry(void *); + +#define pri_aio_idx(pri) (pri) /* * Tear down all service pools @@ -69,8 +77,26 @@ static void aiost_entry(void *); static int aiosp_fini(void) { - int error = 0; - return error; + struct aiosp *aiosp; + int error; + + for (int i = 0; i < aiosp_bank_max; i++) { + aiosp = aiosp_bank[i]; + if (aiosp == NULL) { + continue; + } + + error = aiosp_destroy(aiosp); + if (error) { + return error; + } + + kmem_free(aiosp, sizeof(*aiosp)); + } + + kmem_free(aiosp_bank, sizeof(*aiosp_bank) * aiosp_bank_max); + + return 0; } /* @@ -79,8 +105,18 @@ aiosp_fini(void) static int aiosp_init(void) { - int error = 0; - return error; + struct aiosp **aiosp; + int error; + + aiosp_bank = kmem_zalloc(sizeof(*aiosp_bank) * aiosp_bank_max, KM_SLEEP); + aiosp = &aiosp_bank[pri_aio_idx(PRI_KTHREAD)]; + + error = aiosp_initialize(aiosp); + if (error) { + return error; + } + + return 0; } /* @@ -158,7 +194,7 @@ aiosp_distribute_jobs(struct aiosp *sp) { /* * Initializes a servicing pool. */ -int +static int aiosp_initialize(struct aiosp **ret) { struct aiosp *sp; @@ -174,15 +210,49 @@ aiosp_initialize(struct aiosp **ret) } /* - * Enqueue a job for processing by a servicing queue + * Each process keeps track of all the service threads instantiated to service + * an asynchronous operation by the process. When a process is terminated we + * must also terminate all of its active and pending asynchronous operation. + * EXCEPTIONAL TROLLING FIX LATER */ -int -aiosp_enqueue_job(struct aiosp *sp, struct aio_job *job) +static int +aiosp_destroy(struct aiosp *sp) { + struct aiost *st; + int freelist = 1; + int error; + mutex_enter(&sp->mtx); - TAILQ_INSERT_TAIL(&sp->jobs, job, list); - sp->jobs_pending++; + goto begin; +process: + error = aiost_terminate(st); + if (error) { + mutex_exit(&sp->mtx); + return error; + } + + kmem_free(st, sizeof(*st)); + + if (freelist) { + goto freelist_next; + } else { + goto active_next; + } + +begin: + TAILQ_FOREACH(st, &sp->freelist, list) { + goto process; +freelist_next: + } + + freelist = 0; + TAILQ_FOREACH(st, &sp->active, list) { + goto process; +active_next: + } + + kmem_free(sp, sizeof(*sp)); mutex_exit(&sp->mtx); @@ -190,38 +260,16 @@ aiosp_enqueue_job(struct aiosp *sp, struct aio_job *job) } /* - * Each process keeps track of all the service threads instantiated to service - * an asynchronous operation by the process. When a process is terminated we - * must also terminate all of its active and pending asynchronous operation. + * Enqueue a job for processing by a servicing queue */ int -aiosp_destroy(struct aioproc *proc) +aiosp_enqueue_job(struct aiosp *sp, struct aio_job *job) { - struct aiosp *sp = proc->sp; - mutex_enter(&sp->mtx); - mutex_enter(&proc->aio_mtx); - - /* - * Dance around locks. Iterate over every service thread associated with the - * process and terminate. - */ - struct aiost *st; - TAILQ_FOREACH(st, &proc->active_jobs, list) { - int error = aiost_terminate(st); - if (error) { - mutex_exit(&proc->aio_mtx); - mutex_exit(&sp->mtx); - return error; - } - - kmem_free(st, sizeof(*st)); - } - - kmem_free(sp, sizeof(*sp)); + TAILQ_INSERT_TAIL(&sp->jobs, job, list); + sp->jobs_pending++; - mutex_exit(&proc->aio_mtx); mutex_exit(&sp->mtx); return 0; diff --git a/sys/sys/aio.h b/sys/sys/aio.h index edea2ef9b9c99..2feb35c733a66 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -151,8 +151,6 @@ void aio_print_jobs(void (*)(const char *, ...) __printflike(1, 2)); int aio_suspend1(struct lwp *, struct aiocb **, int, struct timespec *); int aiosp_distribute_jobs(struct aiosp *); int aiosp_enqueue_job(struct aiosp *, struct aio_job *); -int aiosp_initialize(struct aiosp **); -int aiosp_destroy(struct aioproc *); #endif /* _KERNEL */ From b966803beaa56c11b05a24895cbc9b4d56b8fe10 Mon Sep 17 00:00:00 2001 From: Ethan Date: Tue, 3 Jun 2025 12:16:46 -0600 Subject: [PATCH 12/53] service pool priority --- sys/kern/sys_aio.c | 2 +- sys/kern/sys_aiosp.c | 30 +++++++++++++++++++----------- sys/sys/aio.h | 1 + 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index b34cb6a125766..ec22ff60285fc 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -476,7 +476,7 @@ aio_sendsig(struct proc *p, struct sigevent *sig) } /* - * Ensure + * The same job can be enqueued twice. So ensure that it does not exist */ static int aio_validate_conflicts(struct aioproc *aio, void *aiocb_uptr) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 888dd9bb00bff..fcaf229ea5f80 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -61,6 +61,7 @@ static struct aiosp **aiosp_bank; static int aiosp_initialize(struct aiosp **); static int aiosp_destroy(struct aiosp *); +static int aiosp_pri_idx(int); static int aiost_create(struct aiosp *, struct aiost **); static int aiost_terminate(struct aiost *); @@ -69,13 +70,11 @@ static int aiost_process_rw(struct aiost *); static int aiost_process_sync(struct aiost *); static void aiost_entry(void *); -#define pri_aio_idx(pri) (pri) - /* * Tear down all service pools */ static int -aiosp_fini(void) +aio_fini(void) { struct aiosp *aiosp; int error; @@ -103,13 +102,13 @@ aiosp_fini(void) * Initialize global service pool state */ static int -aiosp_init(void) +aio_init(void) { struct aiosp **aiosp; int error; aiosp_bank = kmem_zalloc(sizeof(*aiosp_bank) * aiosp_bank_max, KM_SLEEP); - aiosp = &aiosp_bank[pri_aio_idx(PRI_KTHREAD)]; + aiosp = &aiosp_bank[aiosp_pri_idx(PRI_KTHREAD)]; error = aiosp_initialize(aiosp); if (error) { @@ -127,9 +126,9 @@ aiosp_modcmd(modcmd_t cmd, void *arg) { switch (cmd) { case MODULE_CMD_INIT: - return aiosp_init(); + return aio_init(); case MODULE_CMD_FINI: - return aiosp_fini(); + return aio_fini(); default: return SET_ERROR(ENOTTY); } @@ -201,6 +200,7 @@ aiosp_initialize(struct aiosp **ret) sp = kmem_zalloc(sizeof(*sp), KM_SLEEP); + sp->priority = PRI_KERNEL; mutex_init(&sp->mtx, MUTEX_DEFAULT, IPL_NONE); TAILQ_INIT(&sp->freelist); TAILQ_INIT(&sp->active); @@ -209,6 +209,17 @@ aiosp_initialize(struct aiosp **ret) return 0; } +/* + * Convert a priority into an index into its associative service pool. + */ +static int aiosp_pri_idx(int pri) { + if(pri < PRI_KTHREAD) { + panic("aio_process: invalid priority for AIO"); + } + + return pri - PRI_KTHREAD; +} + /* * Each process keeps track of all the service threads instantiated to service * an asynchronous operation by the process. When a process is terminated we @@ -333,6 +344,7 @@ aiost_entry(void *arg) /* * Remove st from the list of active service threads, do NOT * append to the freelist, dance around locks, exit kthread + * TODO SIMPLIFY AND REMOVE LABELS */ mutex_enter(&sp->mtx); TAILQ_REMOVE(&sp->freelist, st, list); @@ -357,10 +369,6 @@ aiost_entry(void *arg) } else { panic("aio_process: invalid operation code\n"); } - - // touch the job directly - - // TOUCH AIOCPUPR??? OR TOUCH JOB->AIOCBP??? next: /* * Remove st from list of active service threads, append to diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 2feb35c733a66..baf2f801469fa 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -124,6 +124,7 @@ struct aiosp { int jobs_pending; /* Number of pending jobs */ kmutex_t mtx; /* Protects structure */ int nthreads_total; /* Number of total servicing threads */ + int priority; /* Thread priority of the pool */ }; /* LIO structure */ From 47da35630b60a2c48e8ee4b70a5284df93804b03 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Sat, 7 Jun 2025 15:48:12 -0600 Subject: [PATCH 13/53] service banks major bug fixes and begin integration --- sys/kern/sys_aio.c | 7 +- sys/kern/sys_aiosp.c | 213 +++++++++++++++++++++++++++++++++---------- sys/sys/aio.h | 13 ++- 3 files changed, 180 insertions(+), 53 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index ec22ff60285fc..c67504dc21450 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -615,15 +615,16 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) } #ifdef AIOSP - struct aiosp *sp = aio->sp; + a_job->pri = PRI_KTHREAD; + a_job->p = curlwp->l_proc; - error = aiosp_enqueue_job(sp, a_job); + error = aiosp_enqueue_job(a_job); if (error) { mutex_exit(&aio->aio_mtx); return SET_ERROR(error); } - error = aiosp_distribute_jobs(sp); + error = aiosp_dispense_bank(); if (error) { mutex_exit(&aio->aio_mtx); return SET_ERROR(error); diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index fcaf229ea5f80..01eaecc980052 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -56,16 +56,19 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp MODULE(MODULE_CLASS_MISC, aiosp, NULL); -static u_int aiosp_bank_max = NPRI_KTHREAD; -static struct aiosp **aiosp_bank; +static kmutex_t aiospb_mtx; +static u_int aiospb_max = PRI_KTHREAD + NPRI_KTHREAD; +static struct aiosp **aiospb; -static int aiosp_initialize(struct aiosp **); +static int aiosp_initialize(struct aiosp *, pri_t); static int aiosp_destroy(struct aiosp *); -static int aiosp_pri_idx(int); +static int aiosp_retrieve_bank(pri_t, struct aiosp **); +static int aiosp_pri_idx(pri_t); static int aiost_create(struct aiosp *, struct aiost **); static int aiost_terminate(struct aiost *); -static int aiost_configure(struct aiost *, struct aio_job *, vaddr_t *); +static int aiost_configure(struct aiost *, struct aio_job *, + vaddr_t *); static int aiost_process_rw(struct aiost *); static int aiost_process_sync(struct aiost *); static void aiost_entry(void *); @@ -79,8 +82,8 @@ aio_fini(void) struct aiosp *aiosp; int error; - for (int i = 0; i < aiosp_bank_max; i++) { - aiosp = aiosp_bank[i]; + for (int i = 0; i < aiospb_max; i++) { + aiosp = aiospb[i]; if (aiosp == NULL) { continue; } @@ -93,7 +96,7 @@ aio_fini(void) kmem_free(aiosp, sizeof(*aiosp)); } - kmem_free(aiosp_bank, sizeof(*aiosp_bank) * aiosp_bank_max); + kmem_free(aiospb, sizeof(*aiospb) * aiospb_max); return 0; } @@ -104,15 +107,18 @@ aio_fini(void) static int aio_init(void) { - struct aiosp **aiosp; + struct aiosp *aiosp; int error; - aiosp_bank = kmem_zalloc(sizeof(*aiosp_bank) * aiosp_bank_max, KM_SLEEP); - aiosp = &aiosp_bank[aiosp_pri_idx(PRI_KTHREAD)]; + mutex_init(&aiospb_mtx, MUTEX_DEFAULT, IPL_NONE); + + aiospb = kmem_zalloc(sizeof(*aiospb) * aiospb_max, KM_SLEEP); + aiosp = kmem_zalloc(sizeof(*aiosp), KM_SLEEP); + aiospb[aiosp_pri_idx(PRI_KTHREAD)] = aiosp; - error = aiosp_initialize(aiosp); + error = aiosp_initialize(aiosp, PRI_KTHREAD); if (error) { - return error; + return error; } return 0; @@ -140,9 +146,8 @@ aiosp_modcmd(modcmd_t cmd, void *arg) * job to be completed by a servicing thread. */ int -aiosp_distribute_jobs(struct aiosp *sp) { - mutex_enter(&sp->mtx); - +aiosp_distribute_jobs(struct aiosp *sp) +{ /* * Check to see if the number of pending jobs exceeds the number of free * service threads. If it does then that means we need to create new @@ -172,6 +177,9 @@ aiosp_distribute_jobs(struct aiosp *sp) { TAILQ_FOREACH(job, &sp->jobs, list) { struct aiost *aiost = TAILQ_LAST(&sp->freelist, aiost_list); + mutex_enter(&aiost->mtx); + + mutex_enter(&sp->mtx); TAILQ_REMOVE(&sp->freelist, aiost, list); sp->nthreads_free--; @@ -180,12 +188,44 @@ aiosp_distribute_jobs(struct aiosp *sp) { int error = aiost_configure(aiost, job, &aiost->kbuf); if (error) { - mutex_exit(&sp->mtx); + mutex_exit(&aiost->mtx); return error; } + + aiost->job = job; + aiost->state = AIOST_STATE_OPERATION; + + mutex_exit(&sp->mtx); + mutex_exit(&aiost->mtx); + + cv_signal(&aiost->service_cv); } - mutex_exit(&sp->mtx); + return 0; +} + +int +aiosp_dispense_bank(void) +{ + int error; + struct aiosp *sp; + + mutex_enter(&aiospb_mtx); + + for (int i = 0; i < aiosp_pri_idx(aiospb_max); i++) { + sp = aiospb[i]; + if (sp == NULL) { + continue; + } + + error = aiosp_distribute_jobs(sp); + if (error) { + mutex_exit(&aiospb_mtx); + return error; + } + } + + mutex_exit(&aiospb_mtx); return 0; } @@ -194,13 +234,9 @@ aiosp_distribute_jobs(struct aiosp *sp) { * Initializes a servicing pool. */ static int -aiosp_initialize(struct aiosp **ret) +aiosp_initialize(struct aiosp *sp, pri_t pri) { - struct aiosp *sp; - - sp = kmem_zalloc(sizeof(*sp), KM_SLEEP); - - sp->priority = PRI_KERNEL; + sp->priority = pri; mutex_init(&sp->mtx, MUTEX_DEFAULT, IPL_NONE); TAILQ_INIT(&sp->freelist); TAILQ_INIT(&sp->active); @@ -210,14 +246,53 @@ aiosp_initialize(struct aiosp **ret) } /* - * Convert a priority into an index into its associative service pool. + * Convert a priority into an index into the service pool bank. + */ +static int +aiosp_pri_idx(pri_t pri) +{ + if (pri < PRI_KTHREAD) { + panic("aio_process: invalid priority for AIO ( aiospb_max) { + panic("aio_process: invalid priority for AIO (>NPRI_KTHREAD"); + } + + return idx; +} + +/* + * Convert a priority into associative service pool. Initialize the pool if it + * does not yet exist. */ -static int aiosp_pri_idx(int pri) { - if(pri < PRI_KTHREAD) { - panic("aio_process: invalid priority for AIO"); +static int +aiosp_retrieve_bank(pri_t pri, struct aiosp **aiosp) +{ + int error; + int bank_pri_idx; + + mutex_enter(&aiospb_mtx); + + bank_pri_idx = aiosp_pri_idx(pri); + + *aiosp = aiospb[bank_pri_idx]; + if (*aiosp == NULL) { + aiospb[bank_pri_idx] = kmem_zalloc(sizeof(**aiospb), + KM_SLEEP); + *aiosp = aiospb[bank_pri_idx]; + + error = aiosp_initialize(*aiosp, pri); + if (error) { + mutex_exit(&aiospb_mtx); + return error; + } } - return pri - PRI_KTHREAD; + mutex_exit(&aiospb_mtx); + + return 0; } /* @@ -274,8 +349,16 @@ aiosp_destroy(struct aiosp *sp) * Enqueue a job for processing by a servicing queue */ int -aiosp_enqueue_job(struct aiosp *sp, struct aio_job *job) +aiosp_enqueue_job(struct aio_job *job) { + int error; + struct aiosp *sp; + + error = aiosp_retrieve_bank(job->pri, &sp); + if (error) { + return error; + } + mutex_enter(&sp->mtx); TAILQ_INSERT_TAIL(&sp->jobs, job, list); @@ -298,18 +381,27 @@ aiost_create(struct aiosp *sp, struct aiost **ret) st = kmem_zalloc(sizeof(*st), KM_SLEEP); mutex_init(&st->mtx, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&st->service_mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&st->service_cv, "aioservice"); + mutex_enter(&sp->mtx); + int error = kthread_create(PRI_KERNEL, 0, NULL, aiost_entry, st, &st->lwp, "aio_%d_%d", p->p_pid, sp->nthreads_total); if (error) { + mutex_exit(&sp->mtx); return error; } + st->job = NULL; + st->state = AIOST_STATE_NONE; + TAILQ_INSERT_TAIL(&sp->freelist, st, list); sp->nthreads_free++; sp->nthreads_total++; + mutex_exit(&sp->mtx); + if (ret) { *ret = st; } @@ -328,6 +420,7 @@ aiost_entry(void *arg) struct aiost *st = arg; struct aiosp *sp = st->aiosp; struct aio_job *job; + int error; /* * We want to handle abrupt process terminations effectively. We use @@ -336,11 +429,28 @@ aiost_entry(void *arg) * st->service_cv */ for (;;) { - int error = cv_wait_sig(&st->service_cv, &st->mtx); - mutex_enter(&st->mtx); - if(error) goto next; - if(!st->exit) goto process; - + for (;;) { + mutex_enter(&st->mtx); + + if (st->state & AIOST_STATE_OPERATION) { + goto process_operation; + } else if (st->state & AIOST_STATE_TERMINATE) { + goto process_termination; + } else if (st->state & AIOST_STATE_NONE) { + /* + * It does not matter whether or not the + * condition was awoken by a signal as we only + * continue to an operation if the st->state is + * set accordingly + */ + mutex_exit(&st->mtx); + mutex_enter(&st->service_mtx); + cv_wait(&st->service_cv, &st->service_mtx); + } else { + panic("aio_process: invalid aiost state\n"); + } + } +process_termination: /* * Remove st from the list of active service threads, do NOT * append to the freelist, dance around locks, exit kthread @@ -352,7 +462,7 @@ aiost_entry(void *arg) mutex_exit(&sp->mtx); mutex_exit(&st->mtx); kthread_exit(0); -process: +process_operation: job = st->job; if (job->aio_op & (AIO_READ | AIO_WRITE)) { error = aiost_process_rw(st); @@ -384,6 +494,8 @@ aiost_entry(void *arg) TAILQ_INSERT_TAIL(&sp->active, st, list); sp->nthreads_active++; + st->state = AIOST_STATE_NONE; + mutex_exit(&sp->mtx); } } @@ -468,7 +580,8 @@ aiost_process_rw(struct aiost *aiost) * processes a sync/dsync asynchronous operations */ static int -aiost_process_sync(struct aiost *aiost) { +aiost_process_sync(struct aiost *aiost) +{ struct aio_job *job = aiost->job; struct aiocb *aiocbp = &job->aiocbp; struct file *fp; @@ -519,15 +632,16 @@ aiost_process_sync(struct aiost *aiost) { * aiost_entry. */ static int -aiost_terminate(struct aiost *st) { +aiost_terminate(struct aiost *st) +{ mutex_enter(&st->mtx); + st->state = AIOST_STATE_TERMINATE; + mutex_exit(&st->mtx); - st->exit = 1; cv_signal(&st->service_cv); kthread_join(st->lwp); cv_destroy(&st->service_cv); - mutex_exit(&st->mtx); mutex_destroy(&st->mtx); kmem_free(st, sizeof(*st)); @@ -539,9 +653,13 @@ aiost_terminate(struct aiost *st) { * and establish the 'shared' memory region. */ static int -aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) { +aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) +{ struct vmspace *vm = job->p->p_vmspace; struct aiocb *aiocb = &job->aiocbp; + vaddr_t uva, kva; + paddr_t upa; + int error; vm_prot_t protections = VM_PROT_NONE; if (job->aio_op == AIO_READ) { @@ -556,13 +674,13 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) { * To account for the case where the memory is anonymously mapped and * has not yet been fulfilled. */ - int error = uvm_vslock(vm, job->aiocb_uptr, aiocb->aio_nbytes, + error = uvm_vslock(vm, job->aiocb_uptr, aiocb->aio_nbytes, protections); if (error) { return error; } - vaddr_t kva = uvm_km_alloc(kernel_map, aiocb->aio_nbytes, 0, + kva = uvm_km_alloc(kernel_map, aiocb->aio_nbytes, 0, UVM_KMF_VAONLY); if (!kva) { uvm_vsunlock(vm, job->aiocb_uptr, aiocb->aio_nbytes); @@ -572,12 +690,13 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) { /* * Extract physical memory and map to the kernel */ - for (vaddr_t uva = trunc_page((vaddr_t)aiocb->aio_buf); + + for (uva = trunc_page((vaddr_t)aiocb->aio_buf); uva < round_page((vaddr_t)aiocb->aio_buf + aiocb->aio_nbytes); uva += PAGE_SIZE) { - paddr_t upa; - int ret = pmap_extract(vm_map_pmap(&vm->vm_map), uva, &upa); - if (!ret) { + + error = pmap_extract(vm_map_pmap(&vm->vm_map), uva, &upa); + if (error) { uvm_km_free(kernel_map, kva, aiocb->aio_nbytes, UVM_KMF_VAONLY); uvm_vsunlock(vm, job->aiocb_uptr, diff --git a/sys/sys/aio.h b/sys/sys/aio.h index baf2f801469fa..ddf6fe9b20313 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -94,12 +94,17 @@ struct aiocb { struct aio_job { int aio_op; /* Operation code */ struct aiocb aiocbp; /* AIO data structure */ + pri_t pri; /* Job priority */ void *aiocb_uptr; /* User-space pointer for identification of job */ struct proc *p; /* Process that instantiated the job */ TAILQ_ENTRY(aio_job) list; struct lio_req *lio; }; +#define AIOST_STATE_NONE 0x0 +#define AIOST_STATE_OPERATION 0x1 +#define AIOST_STATE_TERMINATE 0x2 + /* Structure for AIO servicing thread */ struct aiosp; struct aiost { @@ -107,10 +112,11 @@ struct aiost { struct aiosp *aiosp; /* Servicing pool of this thread */ kmutex_t mtx; /* Protects this structure */ kcondvar_t service_cv; /* Signal to activate thread */ + kmutex_t service_mtx; /* Signal to activate thread */ struct aio_job *job; /* Jobs associated with the thread */ struct lwp *lwp; /* Servicing thread LWP */ vaddr_t kbuf; /* Shared memory buffer */ - int exit; /* Signifies an exit routine */ + int state; /* The state of the thread */ }; /* Structure for AIO servicing pool */ @@ -142,8 +148,8 @@ struct aioproc { unsigned int jobs_count; /* Count of the jobs */ TAILQ_HEAD(, aio_job) jobs_queue;/* Queue of the AIO jobs */ struct lwp *aio_worker; /* AIO worker thread */ + struct aiost_list active_jobs; /* List of active servicing threads */ - struct aiosp *sp; /* Servicing pool of the process */ }; extern u_int aio_listio_max; @@ -151,7 +157,8 @@ extern u_int aio_listio_max; void aio_print_jobs(void (*)(const char *, ...) __printflike(1, 2)); int aio_suspend1(struct lwp *, struct aiocb **, int, struct timespec *); int aiosp_distribute_jobs(struct aiosp *); -int aiosp_enqueue_job(struct aiosp *, struct aio_job *); +int aiosp_dispense_bank(void); +int aiosp_enqueue_job(struct aio_job *); #endif /* _KERNEL */ From 79550cb5e715f6e0e7bbe17c5aabea09df7c00d2 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Sat, 7 Jun 2025 18:24:38 -0600 Subject: [PATCH 14/53] signal upon completion along with bugs --- sys/kern/sys_aiosp.c | 40 ++++++++++++++++++++++++++++++++-------- sys/sys/aio.h | 6 +++--- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 01eaecc980052..932ae70a157c7 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -72,6 +72,7 @@ static int aiost_configure(struct aiost *, struct aio_job *, static int aiost_process_rw(struct aiost *); static int aiost_process_sync(struct aiost *); static void aiost_entry(void *); +static void aiost_sigsend(struct proc *, struct sigevent *); /* * Tear down all service pools @@ -395,6 +396,7 @@ aiost_create(struct aiosp *sp, struct aiost **ret) st->job = NULL; st->state = AIOST_STATE_NONE; + st->aiosp = sp; TAILQ_INSERT_TAIL(&sp->freelist, st, list); sp->nthreads_free++; @@ -440,21 +442,20 @@ aiost_entry(void *arg) /* * It does not matter whether or not the * condition was awoken by a signal as we only - * continue to an operation if the st->state is - * set accordingly + * continue to an operation/termination if + * st->state is set accordingly */ mutex_exit(&st->mtx); mutex_enter(&st->service_mtx); cv_wait(&st->service_cv, &st->service_mtx); } else { - panic("aio_process: invalid aiost state\n"); + panic("aio_process: invalid aiost state {%x}\n", st->state); } } process_termination: /* * Remove st from the list of active service threads, do NOT * append to the freelist, dance around locks, exit kthread - * TODO SIMPLIFY AND REMOVE LABELS */ mutex_enter(&sp->mtx); TAILQ_REMOVE(&sp->freelist, st, list); @@ -479,6 +480,8 @@ aiost_entry(void *arg) } else { panic("aio_process: invalid operation code\n"); } + + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); next: /* * Remove st from list of active service threads, append to @@ -488,11 +491,11 @@ aiost_entry(void *arg) mutex_exit(&st->mtx); mutex_enter(&sp->mtx); - TAILQ_REMOVE(&sp->freelist, st, list); - sp->nthreads_free--; + TAILQ_REMOVE(&sp->active, st, list); + sp->nthreads_active--; - TAILQ_INSERT_TAIL(&sp->active, st, list); - sp->nthreads_active++; + TAILQ_INSERT_TAIL(&sp->freelist, st, list); + sp->nthreads_free++; st->state = AIOST_STATE_NONE; @@ -500,6 +503,27 @@ aiost_entry(void *arg) } } +/* + * send AIO signal. + */ +static void +aiost_sigsend(struct proc *p, struct sigevent *sig) +{ + ksiginfo_t ksi; + + if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE) + return; + + KSI_INIT(&ksi); + ksi.ksi_signo = sig->sigev_signo; + ksi.ksi_code = SI_ASYNCIO; + ksi.ksi_value = sig->sigev_value; + + mutex_enter(&proc_lock); + kpsignal(p, &ksi, NULL); + mutex_exit(&proc_lock); +} + /* * processes a read/write asynchronous operations */ diff --git a/sys/sys/aio.h b/sys/sys/aio.h index ddf6fe9b20313..691a5bff442de 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -101,9 +101,9 @@ struct aio_job { struct lio_req *lio; }; -#define AIOST_STATE_NONE 0x0 -#define AIOST_STATE_OPERATION 0x1 -#define AIOST_STATE_TERMINATE 0x2 +#define AIOST_STATE_NONE 0x1 +#define AIOST_STATE_OPERATION 0x2 +#define AIOST_STATE_TERMINATE 0x4 /* Structure for AIO servicing thread */ struct aiosp; From ea5120c650000c1b0769d096d628d2c335d7c280 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Sat, 7 Jun 2025 22:50:34 -0600 Subject: [PATCH 15/53] refine the distribution of jobs between threads --- sys/kern/sys_aiosp.c | 78 ++++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 21 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 932ae70a157c7..be7dab37915de 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -149,6 +149,11 @@ aiosp_modcmd(modcmd_t cmd, void *arg) int aiosp_distribute_jobs(struct aiosp *sp) { + struct aiost **aiost_list; + struct aio_job *job; + int total_dispensed; + int error = 0; + /* * Check to see if the number of pending jobs exceeds the number of free * service threads. If it does then that means we need to create new @@ -160,7 +165,7 @@ aiosp_distribute_jobs(struct aiosp *sp) for (int i = 0; i < nthreads_new; i++) { struct aiost *aiost; - int error = aiost_create(sp, &aiost); + error = aiost_create(sp, &aiost); if (error) { mutex_exit(&sp->mtx); return error; @@ -168,43 +173,70 @@ aiosp_distribute_jobs(struct aiosp *sp) } } + if (!sp->jobs_pending) { + return 0; + } + + total_dispensed = 0; + aiost_list = kmem_zalloc(sizeof(*aiost_list) * + sp->jobs_pending, KM_SLEEP); + /* * Loop over all pending jobs and assign a thread from the freelist. * Move from freelist to active. Configure service thread to work with - * respect to the job (and importantly the buffer associated with that - * job) + * respect to the job. Also signal the CV outside of sp->mtx to avoid + * any shenanigans. */ - struct aio_job *job; - TAILQ_FOREACH(job, &sp->jobs, list) { + mutex_enter(&sp->mtx); + struct aio_job *tmp; + TAILQ_FOREACH_SAFE(job, &sp->jobs, list, tmp) { struct aiost *aiost = TAILQ_LAST(&sp->freelist, aiost_list); + if (aiost == NULL) { + panic("aiosp_distribute_jobs: aiost is null"); + } mutex_enter(&aiost->mtx); - mutex_enter(&sp->mtx); TAILQ_REMOVE(&sp->freelist, aiost, list); sp->nthreads_free--; TAILQ_INSERT_TAIL(&sp->active, aiost, list); sp->nthreads_active++; - int error = aiost_configure(aiost, job, &aiost->kbuf); + error = aiost_configure(aiost, job, &aiost->kbuf); if (error) { + kmem_free(aiost_list + total_dispensed, + sizeof(*aiost_list) * sp->jobs_pending); mutex_exit(&aiost->mtx); - return error; + goto finish; } - aiost->job = job; - aiost->state = AIOST_STATE_OPERATION; + TAILQ_REMOVE(&sp->jobs, job, list); + + aiost_list[total_dispensed++] = aiost; + sp->jobs_pending--; - mutex_exit(&sp->mtx); mutex_exit(&aiost->mtx); + } +finish: + mutex_exit(&sp->mtx); + for (int i = 0; i < total_dispensed; i++) { + struct aiost *aiost = aiost_list[i]; + aiost->job = job; + aiost->state = AIOST_STATE_OPERATION; cv_signal(&aiost->service_cv); } - return 0; + kmem_free(aiost_list, sizeof(*aiost_list) * total_dispensed); + + return error; } +/* + * Distribute all pending operations on all service queues attached to the + * primary bank + */ int aiosp_dispense_bank(void) { @@ -439,17 +471,22 @@ aiost_entry(void *arg) } else if (st->state & AIOST_STATE_TERMINATE) { goto process_termination; } else if (st->state & AIOST_STATE_NONE) { - /* - * It does not matter whether or not the - * condition was awoken by a signal as we only - * continue to an operation/termination if - * st->state is set accordingly - */ mutex_exit(&st->mtx); mutex_enter(&st->service_mtx); - cv_wait(&st->service_cv, &st->service_mtx); + error = cv_wait_sig(&st->service_cv, + &st->service_mtx); + mutex_exit(&st->service_mtx); + if (error) { + /* + * Thread was interrupt. Check for + * pending exit or suspension + */ + mutex_exit(&st->service_mtx); + lwp_userret(curlwp); + } } else { - panic("aio_process: invalid aiost state {%x}\n", st->state); + panic("aio_process: invalid aiost state {%x}\n", + st->state); } } process_termination: @@ -714,7 +751,6 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) /* * Extract physical memory and map to the kernel */ - for (uva = trunc_page((vaddr_t)aiocb->aio_buf); uva < round_page((vaddr_t)aiocb->aio_buf + aiocb->aio_nbytes); uva += PAGE_SIZE) { From 32256fcbf018f26aed783f0e9c172c820969fe25 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Sun, 8 Jun 2025 15:56:26 -0600 Subject: [PATCH 16/53] fix job assignment to st --- sys/kern/sys_aiosp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index be7dab37915de..c96ac77d8f185 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -213,6 +213,8 @@ aiosp_distribute_jobs(struct aiosp *sp) TAILQ_REMOVE(&sp->jobs, job, list); + aiost->job = job; + aiost_list[total_dispensed++] = aiost; sp->jobs_pending--; @@ -223,7 +225,6 @@ aiosp_distribute_jobs(struct aiosp *sp) for (int i = 0; i < total_dispensed; i++) { struct aiost *aiost = aiost_list[i]; - aiost->job = job; aiost->state = AIOST_STATE_OPERATION; cv_signal(&aiost->service_cv); } @@ -520,12 +521,14 @@ aiost_entry(void *arg) aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); next: + st->state = AIOST_STATE_NONE; + mutex_exit(&st->mtx); + /* * Remove st from list of active service threads, append to * freelist, dance around locks, then iterate loop and block on * st->service_cv */ - mutex_exit(&st->mtx); mutex_enter(&sp->mtx); TAILQ_REMOVE(&sp->active, st, list); @@ -534,8 +537,6 @@ aiost_entry(void *arg) TAILQ_INSERT_TAIL(&sp->freelist, st, list); sp->nthreads_free++; - st->state = AIOST_STATE_NONE; - mutex_exit(&sp->mtx); } } From 0c4df413014cf12004e0375a8eae9d805c16687c Mon Sep 17 00:00:00 2001 From: Ethan Date: Wed, 11 Jun 2025 19:32:54 -0600 Subject: [PATCH 17/53] sanctity of types and aiost_teardown --- sys/kern/sys_aiosp.c | 93 ++++++++++++++++++++++++++++++-------------- sys/sys/aio.h | 10 ++--- 2 files changed, 68 insertions(+), 35 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index c96ac77d8f185..2baad6889af77 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -69,6 +69,7 @@ static int aiost_create(struct aiosp *, struct aiost **); static int aiost_terminate(struct aiost *); static int aiost_configure(struct aiost *, struct aio_job *, vaddr_t *); +static int aiost_teardown(struct aiost *); static int aiost_process_rw(struct aiost *); static int aiost_process_sync(struct aiost *); static void aiost_entry(void *); @@ -333,50 +334,41 @@ aiosp_retrieve_bank(pri_t pri, struct aiosp **aiosp) * Each process keeps track of all the service threads instantiated to service * an asynchronous operation by the process. When a process is terminated we * must also terminate all of its active and pending asynchronous operation. - * EXCEPTIONAL TROLLING FIX LATER */ static int aiosp_destroy(struct aiosp *sp) { struct aiost *st; - int freelist = 1; - int error; + struct aiost *tmp; + int error = 0; mutex_enter(&sp->mtx); - goto begin; -process: - error = aiost_terminate(st); - if (error) { - mutex_exit(&sp->mtx); - return error; - } - - kmem_free(st, sizeof(*st)); + /* + * Terminate and destroy every service thread both free and active. + */ + TAILQ_FOREACH_SAFE(st, &sp->freelist, list, tmp) { + error = aiost_terminate(st); + if (error) { + goto finish; + } - if (freelist) { - goto freelist_next; - } else { - goto active_next; + kmem_free(st, sizeof(*st)); } -begin: - TAILQ_FOREACH(st, &sp->freelist, list) { - goto process; -freelist_next: - } + TAILQ_FOREACH_SAFE(st, &sp->active, list, tmp) { + error = aiost_terminate(st); + if (error) { + goto finish; + } - freelist = 0; - TAILQ_FOREACH(st, &sp->active, list) { - goto process; -active_next: + kmem_free(st, sizeof(*st)); } - +finish: kmem_free(sp, sizeof(*sp)); - mutex_exit(&sp->mtx); - return 0; + return error; } /* @@ -421,7 +413,7 @@ aiost_create(struct aiosp *sp, struct aiost **ret) mutex_enter(&sp->mtx); int error = kthread_create(PRI_KERNEL, 0, NULL, aiost_entry, - st, &st->lwp, "aio_%d_%d", p->p_pid, sp->nthreads_total); + st, &st->lwp, "aio_%d_%ld", p->p_pid, sp->nthreads_total); if (error) { mutex_exit(&sp->mtx); return error; @@ -696,7 +688,13 @@ aiost_process_sync(struct aiost *aiost) static int aiost_terminate(struct aiost *st) { + int error = 0; + mutex_enter(&st->mtx); + error = aiost_teardown(st); + if (error) { + return error; + } st->state = AIOST_STATE_TERMINATE; mutex_exit(&st->mtx); @@ -707,7 +705,7 @@ aiost_terminate(struct aiost *st) mutex_destroy(&st->mtx); kmem_free(st, sizeof(*st)); - return 0; + return error; } /* @@ -774,3 +772,38 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) return 0; } + +/* + * Free all memory and meta associated with aiost->kbuf + */ +static int +aiost_teardown(struct aiost *aiost) +{ + struct aio_job *job; + struct vmspace *vm; + struct aiocb *aiocb; + vaddr_t kva; + + job = aiost->job; + if (job == NULL) { + return 0; + } + + vm = job->p->p_vmspace; + aiocb = &job->aiocbp; + + kva = (vaddr_t)aiost->kbuf; + if (!kva) { + return 0; + } + + for (vaddr_t va = kva; va < kva + round_page(aiocb->aio_nbytes); + va += PAGE_SIZE) { + pmap_kremove(va, PAGE_SIZE); + } + + uvm_km_free(kernel_map, kva, aiocb->aio_nbytes, UVM_KMF_VAONLY); + uvm_vsunlock(vm, job->aiocb_uptr, aiocb->aio_nbytes); + + return 0; +} diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 691a5bff442de..b86b6259de7f6 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -123,14 +123,14 @@ struct aiost { TAILQ_HEAD(aiost_list, aiost); struct aiosp { struct aiost_list freelist; /* Available service threads */ - int nthreads_free; /* Length of freelist */ + size_t nthreads_free; /* Length of freelist */ struct aiost_list active; /* Active servicing threads */ - int nthreads_active; /* length of active list */ + size_t nthreads_active; /* length of active list */ TAILQ_HEAD(, aio_job) jobs; /* Queue of pending jobs */ - int jobs_pending; /* Number of pending jobs */ + size_t jobs_pending; /* Number of pending jobs */ kmutex_t mtx; /* Protects structure */ - int nthreads_total; /* Number of total servicing threads */ - int priority; /* Thread priority of the pool */ + size_t nthreads_total; /* Number of total servicing threads */ + pri_t priority; /* Thread priority of the pool */ }; /* LIO structure */ From f7931dc586a1c1f429044b162c60f70816efaa78 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Tue, 17 Jun 2025 13:29:17 -0600 Subject: [PATCH 18/53] handle termination --- sys/kern/sys_aio.c | 24 ++++++++----------- sys/kern/sys_aiosp.c | 57 ++++++++++++++++++++++++++++++++++++++++++++ sys/sys/aio.h | 5 ++-- 3 files changed, 70 insertions(+), 16 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index c67504dc21450..6498ad3aeabc6 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -84,7 +84,6 @@ static void * aio_ehook; static void aio_worker(void *); static void aio_process(struct aio_job *); static void aio_sendsig(struct proc *, struct sigevent *); -static int aio_validate_conflicts(struct aioproc *, void *); static int aio_enqueue_job(int, void *, struct lio_req *); static void aio_exit(proc_t *, void *); @@ -164,7 +163,6 @@ aio_init(void) static int aio_modcmd(modcmd_t cmd, void *arg) { - switch (cmd) { case MODULE_CMD_INIT: return aio_init(); @@ -194,6 +192,7 @@ aio_procinit(struct proc *p) cv_init(&aio->aio_worker_cv, "aiowork"); cv_init(&aio->done_cv, "aiodone"); TAILQ_INIT(&aio->jobs_queue); + TAILQ_INIT(&aio->aiost_total); /* * Create an AIO worker thread. @@ -478,33 +477,26 @@ aio_sendsig(struct proc *p, struct sigevent *sig) /* * The same job can be enqueued twice. So ensure that it does not exist */ +#ifndef AIOSP static int -aio_validate_conflicts(struct aioproc *aio, void *aiocb_uptr) +aio_validate_conflicts(struct aioproc *aio, void *uptr) { mutex_enter(&aio->aio_mtx); -#ifdef AIOSP - struct aiost *st; - TAILQ_FOREACH(st, &aio->active_jobs, list) { - if (st->job->aiocb_uptr != aiocb_uptr) - continue; - mutex_exit(&aio->aio_mtx); - return EINVAL; - } -#else struct aio_job *a_job; TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { - if (a_job->aiocb_uptr != aiocb_uptr) + if (a_job->aiocb_uptr != uptr) { continue; + } mutex_exit(&aio->aio_mtx); return EINVAL; } -#endif mutex_exit(&aio->aio_mtx); return 0; } +#endif /* * Enqueue the job. @@ -559,7 +551,11 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) */ aio = p->p_aio; if (aio) { +#ifdef AIOSP + error = aiosp_validate_conflicts(aio, aiocb_uptr); +#else error = aio_validate_conflicts(aio, aiocb_uptr); +#endif if (error) { return SET_ERROR(error); } diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 2baad6889af77..348fee96fdf7f 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -150,6 +150,7 @@ aiosp_modcmd(modcmd_t cmd, void *arg) int aiosp_distribute_jobs(struct aiosp *sp) { + //struct proc *p = curlwp->l_proc; struct aiost **aiost_list; struct aio_job *job; int total_dispensed; @@ -265,6 +266,8 @@ aiosp_dispense_bank(void) return 0; } +// WHEN A PROCESS DIES DESTROY ALL SERVICE THREADS + /* * Initializes a servicing pool. */ @@ -807,3 +810,57 @@ aiost_teardown(struct aiost *aiost) return 0; } + +/* + * For major workloads that actually merit the use of asynchronous IO you can + * expect an arbitrarily high number of servicing threads to spawn. Throughout + * their lifecycle these servicing threads will remain cached within the bank to + * be pulled from when needed. It makes sense to flush this cache routinely when + * a process terminates. All servicing threads spawned by a given process will + * be flushed when that process terminates. + */ +int +aiosp_flush(struct aioproc *proc) +{ + struct aiost *st; + struct aiost *tmp; + int error; + + mutex_enter(&proc->aio_mtx); + + TAILQ_FOREACH_SAFE(st, &proc->aiost_total, list, tmp) { + error = aiost_terminate(st); + if (error) { + mutex_exit(&proc->aio_mtx); + return error; + } + + kmem_free(st, sizeof(*st)); + } + + mutex_exit(&proc->aio_mtx); + + return error; +} + +/* + * The same job can not be enqueued twice. + */ +int +aiosp_validate_conflicts(struct aioproc *proc, void *uptr) +{ + mutex_enter(&proc->aio_mtx); + + struct aiost *st; + TAILQ_FOREACH(st, &proc->aiost_total, list) { + if (st->job->aiocb_uptr != uptr) { + continue; + } + mutex_exit(&proc->aio_mtx); + return EINVAL; + } + + mutex_exit(&proc->aio_mtx); + + return 0; +} diff --git a/sys/sys/aio.h b/sys/sys/aio.h index b86b6259de7f6..e8e81d7ccdb9e 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -148,8 +148,7 @@ struct aioproc { unsigned int jobs_count; /* Count of the jobs */ TAILQ_HEAD(, aio_job) jobs_queue;/* Queue of the AIO jobs */ struct lwp *aio_worker; /* AIO worker thread */ - - struct aiost_list active_jobs; /* List of active servicing threads */ + struct aiost_list aiost_total; /* Total list of servicing threads */ }; extern u_int aio_listio_max; @@ -159,6 +158,8 @@ int aio_suspend1(struct lwp *, struct aiocb **, int, struct timespec *); int aiosp_distribute_jobs(struct aiosp *); int aiosp_dispense_bank(void); int aiosp_enqueue_job(struct aio_job *); +int aiosp_flush(struct aioproc *); +int aiosp_validate_conflicts(struct aioproc *, void *); #endif /* _KERNEL */ From 76547e6c304e3536abd4a4862e6ad4f736b0d136 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Wed, 18 Jun 2025 14:51:03 -0600 Subject: [PATCH 19/53] aio_suspend and supporting aiosp_ops --- sys/kern/sys_aiosp.c | 117 +++++++++++++++++++++++++++++++++++++++---- sys/sys/aio.h | 11 ++++ 2 files changed, 118 insertions(+), 10 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 348fee96fdf7f..f7c95a21a41d1 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -35,6 +35,7 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp #include #include +#include #include #include @@ -236,6 +237,91 @@ aiosp_distribute_jobs(struct aiosp *sp) return error; } +/* + * aiosp_ops represent a collection of operations whose status should be + * tracked. When the user invokes a suspend, we create a new collection, and + * then for each aiost referenced within aiocbp_list, when those operations + * are finished, every aiosp_ops appended to that thread (aiost->ops) gets + * awoken and the completion count incremented. The completion counter can be + * incremeneted posthumously as well. + */ +int +aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, + struct timespec *ts) +{ + int error; + int timo; + + if (ts) { + timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); + if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0) { + timo = 1; + } + + if (timo <= 0) { + return SET_ERROR(EAGAIN); + } + } else { + timo = 0; + } + + struct aiosp_ops ops = { 0 }; + cv_init(&ops.done_cv, "aiodone"); + mutex_init(&ops.mtx, MUTEX_DEFAULT, IPL_NONE); + + for (int i = 0; i < nent; i++) { + struct aiost *aiost; + TAILQ_FOREACH(aiost, &aioproc->aiost_total, list) { + if (aiost->state == AIOST_STATE_NONE || + aiost->state == AIOST_STATE_TERMINATE) { + continue; + } + + if (aiost->job->aiocb_uptr == aiocbp_list[i]) { + goto process; + } + } + + // HANDLE THIS PROPERLY + error = SET_ERROR(EINVAL); + return error; +process: + mutex_enter(&aiost->mtx); + + if (powerof2(aiost->ops_total)) { + size_t total = aiost->ops_total - 1; + for (int j = 0; j < ilog2(sizeof(total) * 8); + j++) { + total |= total >> (1 << j); + } + total += 1; + + aiost->ops = kmem_alloc(total * + sizeof(struct aiost), KM_SLEEP); + } + + aiost->ops[aiost->ops_total++] = &ops; + + mutex_exit(&aiost->mtx); + } + + mutex_enter(&ops.mtx); + for (; ops.completed != ops.total;) { + error = cv_timedwait_sig(&ops.done_cv, &ops.mtx, timo); + if (error) { + if (error == EWOULDBLOCK) { + error = SET_ERROR(EAGAIN); + } + + mutex_exit(&ops.mtx); + return error; + } + } + + mutex_exit(&ops.mtx); + return error; +} + /* * Distribute all pending operations on all service queues attached to the * primary bank @@ -694,11 +780,22 @@ aiost_terminate(struct aiost *st) int error = 0; mutex_enter(&st->mtx); + + size_t total = st->ops_total - 1; + for (int j = 0; j < ilog2(sizeof(total) * 8); j++) { + total |= total >> (1 << j); + } + total += 1; + + kmem_free(st->ops, total * sizeof(struct aiost)); + st->ops = NULL; + error = aiost_teardown(st); if (error) { return error; } st->state = AIOST_STATE_TERMINATE; + mutex_exit(&st->mtx); cv_signal(&st->service_cv); @@ -820,25 +917,25 @@ aiost_teardown(struct aiost *aiost) * be flushed when that process terminates. */ int -aiosp_flush(struct aioproc *proc) +aiosp_flush(struct aioproc *aioproc) { struct aiost *st; struct aiost *tmp; int error; - mutex_enter(&proc->aio_mtx); + mutex_enter(&aioproc->aio_mtx); - TAILQ_FOREACH_SAFE(st, &proc->aiost_total, list, tmp) { + TAILQ_FOREACH_SAFE(st, &aioproc->aiost_total, list, tmp) { error = aiost_terminate(st); if (error) { - mutex_exit(&proc->aio_mtx); + mutex_exit(&aioproc->aio_mtx); return error; } kmem_free(st, sizeof(*st)); } - mutex_exit(&proc->aio_mtx); + mutex_exit(&aioproc->aio_mtx); return error; } @@ -847,20 +944,20 @@ aiosp_flush(struct aioproc *proc) * The same job can not be enqueued twice. */ int -aiosp_validate_conflicts(struct aioproc *proc, void *uptr) +aiosp_validate_conflicts(struct aioproc *aioproc, void *uptr) { - mutex_enter(&proc->aio_mtx); + mutex_enter(&aioproc->aio_mtx); struct aiost *st; - TAILQ_FOREACH(st, &proc->aiost_total, list) { + TAILQ_FOREACH(st, &aioproc->aiost_total, list) { if (st->job->aiocb_uptr != uptr) { continue; } - mutex_exit(&proc->aio_mtx); + mutex_exit(&aioproc->aio_mtx); return EINVAL; } - mutex_exit(&proc->aio_mtx); + mutex_exit(&aioproc->aio_mtx); return 0; } diff --git a/sys/sys/aio.h b/sys/sys/aio.h index e8e81d7ccdb9e..f01bc0173db0d 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -105,6 +105,14 @@ struct aio_job { #define AIOST_STATE_OPERATION 0x2 #define AIOST_STATE_TERMINATE 0x4 +/* Structure for tracking the status of a collection of OPS */ +struct aiosp_ops { + kmutex_t mtx; /* Protects this structure */ + kcondvar_t done_cv; /* Signals when a job is complete */ + int completed; /* Keeps track of the number of completed jobs */ + int total; /* Keeps track of the number of total jobs */ +}; + /* Structure for AIO servicing thread */ struct aiosp; struct aiost { @@ -115,6 +123,8 @@ struct aiost { kmutex_t service_mtx; /* Signal to activate thread */ struct aio_job *job; /* Jobs associated with the thread */ struct lwp *lwp; /* Servicing thread LWP */ + int ops_total; /* Total number of connected ops */ + struct aiosp_ops **ops; /* Array of ops */ vaddr_t kbuf; /* Shared memory buffer */ int state; /* The state of the thread */ }; @@ -158,6 +168,7 @@ int aio_suspend1(struct lwp *, struct aiocb **, int, struct timespec *); int aiosp_distribute_jobs(struct aiosp *); int aiosp_dispense_bank(void); int aiosp_enqueue_job(struct aio_job *); +int aiosp_suspend(struct aioproc *, struct aiocb **, int, struct timespec *); int aiosp_flush(struct aioproc *); int aiosp_validate_conflicts(struct aioproc *, void *); From 62565366c634bedc408fc9e2c249483f2e1859d5 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Thu, 19 Jun 2025 22:20:21 -0600 Subject: [PATCH 20/53] aiocbp hash --- sys/kern/sys_aiosp.c | 184 ++++++++++++++++++++++++++++++++++++------- sys/sys/aio.h | 26 +++++- 2 files changed, 180 insertions(+), 30 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index f7c95a21a41d1..509f29d319310 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -36,6 +36,7 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp #include #include #include +#include #include #include @@ -65,6 +66,7 @@ static int aiosp_initialize(struct aiosp *, pri_t); static int aiosp_destroy(struct aiosp *); static int aiosp_retrieve_bank(pri_t, struct aiosp **); static int aiosp_pri_idx(pri_t); +static size_t aiosp_ops_expected(size_t); static int aiost_create(struct aiosp *, struct aiost **); static int aiost_terminate(struct aiost *); @@ -270,32 +272,29 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, mutex_init(&ops.mtx, MUTEX_DEFAULT, IPL_NONE); for (int i = 0; i < nent; i++) { - struct aiost *aiost; - TAILQ_FOREACH(aiost, &aioproc->aiost_total, list) { - if (aiost->state == AIOST_STATE_NONE || - aiost->state == AIOST_STATE_TERMINATE) { - continue; - } + if (aiocbp_list[i] == NULL) { + continue; + } - if (aiost->job->aiocb_uptr == aiocbp_list[i]) { - goto process; - } + struct aiocbp *aiocbp = NULL; + error = aiocbp_lookup(aioproc, &aiocbp, aiocbp_list[i]); + if (error) { + return error; + } + if (aiocbp == NULL) { + continue; + } + + struct aiost *aiost = aiocbp->job->aiost; + if (aiost->state == AIOST_STATE_TERMINATE || + aiost->state == AIOST_STATE_NONE) { + ops.completed++; } - // HANDLE THIS PROPERLY - error = SET_ERROR(EINVAL); - return error; -process: mutex_enter(&aiost->mtx); if (powerof2(aiost->ops_total)) { - size_t total = aiost->ops_total - 1; - for (int j = 0; j < ilog2(sizeof(total) * 8); - j++) { - total |= total >> (1 << j); - } - total += 1; - + size_t total = aiosp_ops_expected(aiost->ops_total); aiost->ops = kmem_alloc(total * sizeof(struct aiost), KM_SLEEP); } @@ -352,8 +351,6 @@ aiosp_dispense_bank(void) return 0; } -// WHEN A PROCESS DIES DESTROY ALL SERVICE THREADS - /* * Initializes a servicing pool. */ @@ -387,6 +384,23 @@ aiosp_pri_idx(pri_t pri) return idx; } +/* + * The size of aiost->ops scales with powers of two. The size of aiost->ops will + * only either collapse to zero upon being terminated, or continue growing, so + * scaling by a power of two is simple enough. + */ +static size_t +aiosp_ops_expected(size_t total) +{ + total -= 1; + for (int j = 0; j < ilog2(sizeof(total) * 8); j++) { + total |= total >> (1 << j); + } + total += 1; + + return total; +} + /* * Convert a priority into associative service pool. Initialize the pool if it * does not yet exist. @@ -781,12 +795,7 @@ aiost_terminate(struct aiost *st) mutex_enter(&st->mtx); - size_t total = st->ops_total - 1; - for (int j = 0; j < ilog2(sizeof(total) * 8); j++) { - total |= total >> (1 << j); - } - total += 1; - + size_t total = aiosp_ops_expected(st->ops_total); kmem_free(st->ops, total * sizeof(struct aiost)); st->ops = NULL; @@ -941,7 +950,7 @@ aiosp_flush(struct aioproc *aioproc) } /* - * The same job can not be enqueued twice. + * Ensure that the same job can not be enqueued twice. */ int aiosp_validate_conflicts(struct aioproc *aioproc, void *uptr) @@ -961,3 +970,120 @@ aiosp_validate_conflicts(struct aioproc *aioproc, void *uptr) return 0; } + +/* + * aiocbp hash function + */ +static inline u_int +aiocbp_hash(void *uptr) +{ + return hash32_buf(&uptr, sizeof(uptr), HASH32_BUF_INIT); +} + +/* + * aiocbp hash lookup + */ +int +aiocbp_lookup(struct aioproc *aioproc, struct aiocbp **aiocbpp, void *uptr) +{ + struct aiocbp *aiocbp; + u_int hash; + + hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; + + LIST_FOREACH(aiocbp, &aioproc->aio_hash[hash], list) { + if (aiocbp->uptr == uptr) { + *aiocbpp = aiocbp; + return 0; + } + } + + return ENOENT; +} + +/* + * aiocbp hash removal + */ +int +aiocbp_remove(struct aioproc *aioproc, struct aiocbp *aiocbp, void *uptr) +{ + struct aiocbp *found; + u_int hash; + + hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; + + LIST_FOREACH(found, &aioproc->aio_hash[hash], list) { + if (found->uptr == uptr) { + if (found != aiocbp) { + return EINVAL; + } + + LIST_REMOVE(aiocbp, list); + return 0; + } + } + + return ENOENT; +} + +/* + * aiocbp hash insertion + */ +int +aiocbp_insert(struct aioproc *aioproc, struct aiocbp *aiocbp, void *uptr) +{ + struct aiocbp *found; + u_int hash; + + hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; + + LIST_FOREACH(found, &aioproc->aio_hash[hash], list) { + if (found->uptr == uptr) { + found->job = aiocbp->job; + return EEXIST; + } + } + + aiocbp->uptr = uptr; + LIST_INSERT_HEAD(&aioproc->aio_hash[hash], aiocbp, list); + + return 0; +} + +/* + * aiocbp initialise + */ +int +aiocbp_init(struct aioproc *aioproc, u_int hashsize) +{ + if (!powerof2(hashsize)) { + return EINVAL; + } + + aioproc->aio_hash = kmem_zalloc(hashsize * sizeof(struct aiocbp_list), + KM_SLEEP); + + aioproc->aio_hash_mask = hashsize - 1; + aioproc->aio_hash_size = hashsize; + + for (size_t i = 0; i < hashsize; i++) { + LIST_INIT(&aioproc->aio_hash[i]); + } + + return 0; +} + +/* + * aiocbp destroy + */ +void +aiocbp_destroy(struct aioproc *aioproc) +{ + if (aioproc->aio_hash != NULL) { + kmem_free(aioproc->aio_hash, + aioproc->aio_hash_size * sizeof(struct aiocbp_list)); + aioproc->aio_hash = NULL; + aioproc->aio_hash_mask = 0; + aioproc->aio_hash_size = 0; + } +} diff --git a/sys/sys/aio.h b/sys/sys/aio.h index f01bc0173db0d..a54eda5c953ae 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -91,12 +91,14 @@ struct aiocb { #define JOB_DONE 0x2 /* Structure of AIO job */ +struct aiost; struct aio_job { int aio_op; /* Operation code */ struct aiocb aiocbp; /* AIO data structure */ pri_t pri; /* Job priority */ void *aiocb_uptr; /* User-space pointer for identification of job */ struct proc *p; /* Process that instantiated the job */ + struct aiost *aiost; /* Service thread associated with this job */ TAILQ_ENTRY(aio_job) list; struct lio_req *lio; }; @@ -143,6 +145,12 @@ struct aiosp { pri_t priority; /* Thread priority of the pool */ }; +struct aiocbp { + LIST_ENTRY(aiocbp) list; + void *uptr; + struct aio_job *job; +}; + /* LIO structure */ struct lio_req { u_int refcnt; /* Reference counter */ @@ -150,6 +158,7 @@ struct lio_req { }; /* Structure of AIO data for process */ +LIST_HEAD(aiocbp_list, aiocbp); struct aioproc { kmutex_t aio_mtx; /* Protects the entire structure */ kcondvar_t aio_worker_cv; /* Signals on a new job */ @@ -159,12 +168,20 @@ struct aioproc { TAILQ_HEAD(, aio_job) jobs_queue;/* Queue of the AIO jobs */ struct lwp *aio_worker; /* AIO worker thread */ struct aiost_list aiost_total; /* Total list of servicing threads */ + struct aiocbp_list *aio_hash; + size_t aio_hash_size; + u_int aio_hash_mask; }; extern u_int aio_listio_max; -/* Prototypes */ + +/* + * Prototypes + */ + void aio_print_jobs(void (*)(const char *, ...) __printflike(1, 2)); int aio_suspend1(struct lwp *, struct aiocb **, int, struct timespec *); + int aiosp_distribute_jobs(struct aiosp *); int aiosp_dispense_bank(void); int aiosp_enqueue_job(struct aio_job *); @@ -172,6 +189,13 @@ int aiosp_suspend(struct aioproc *, struct aiocb **, int, struct timespec *); int aiosp_flush(struct aioproc *); int aiosp_validate_conflicts(struct aioproc *, void *); +void aiocbp_destroy(struct aioproc *); +int aiocbp_init(struct aioproc *, u_int); +int aiocbp_lookup(struct aioproc *, struct aiocbp **, void *); +int aiocbp_remove(struct aioproc *, struct aiocbp *, void *); +int aiocbp_insert(struct aioproc *, struct aiocbp *, void *); + + #endif /* _KERNEL */ #endif /* _SYS_AIO_H_ */ From 89f17ba16cd80335987f206b06f08c4fb9395226 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Sat, 21 Jun 2025 14:25:18 -0600 Subject: [PATCH 21/53] posthumous job tracking and style --- sys/kern/sys_aio.c | 41 +++++++++++++++++++++--------- sys/kern/sys_aiosp.c | 59 ++++++++++++++++++++++++++------------------ sys/sys/aio.h | 8 +++--- 3 files changed, 68 insertions(+), 40 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 6498ad3aeabc6..f0d0806e93dfa 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -187,6 +187,12 @@ aio_procinit(struct proc *p) /* Allocate and initialize AIO structure */ aio = kmem_zalloc(sizeof(struct aioproc), KM_SLEEP); + /* Initialize the aiocbp hash map */ + error = aiocbp_init(aio, 256); + if (error) { + return error; + } + /* Initialize queue and their synchronization structures */ mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&aio->aio_worker_cv, "aiowork"); @@ -255,6 +261,7 @@ aio_exit(struct proc *p, void *cookie) } /* Destroy and free the entire AIO data structure */ + aiocbp_destroy(aio); cv_destroy(&aio->aio_worker_cv); cv_destroy(&aio->done_cv); mutex_destroy(&aio->aio_mtx); @@ -507,7 +514,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) struct proc *p = curlwp->l_proc; struct aioproc *aio; struct aio_job *a_job; - struct aiocb aiocbp; + struct aiocb aiocb; struct sigevent *sig; int error; @@ -516,30 +523,30 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) return SET_ERROR(EAGAIN); /* Get the data structure from user-space */ - error = copyin(aiocb_uptr, &aiocbp, sizeof(struct aiocb)); + error = copyin(aiocb_uptr, &aiocb, sizeof(struct aiocb)); if (error) return error; /* Check if signal is set, and validate it */ - sig = &aiocbp.aio_sigevent; + sig = &aiocb.aio_sigevent; if (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG || sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA) return SET_ERROR(EINVAL); /* Buffer and byte count */ if (((AIO_SYNC | AIO_DSYNC) & op) == 0) - if (aiocbp.aio_buf == NULL || aiocbp.aio_nbytes > SSIZE_MAX) + if (aiocb.aio_buf == NULL || aiocb.aio_nbytes > SSIZE_MAX) return SET_ERROR(EINVAL); /* Check the opcode, if LIO_NOP - simply ignore */ if (op == AIO_LIO) { KASSERT(lio != NULL); - if (aiocbp.aio_lio_opcode == LIO_WRITE) + if (aiocb.aio_lio_opcode == LIO_WRITE) op = AIO_WRITE; - else if (aiocbp.aio_lio_opcode == LIO_READ) + else if (aiocb.aio_lio_opcode == LIO_READ) op = AIO_READ; else - return (aiocbp.aio_lio_opcode == LIO_NOP) ? 0 : + return (aiocb.aio_lio_opcode == LIO_NOP) ? 0 : SET_ERROR(EINVAL); } else { KASSERT(lio == NULL); @@ -575,10 +582,10 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) * Set the state with errno, and copy data * structure back to the user-space. */ - aiocbp._state = JOB_WIP; - aiocbp._errno = SET_ERROR(EINPROGRESS); - aiocbp._retval = -1; - error = copyout(&aiocbp, aiocb_uptr, sizeof(struct aiocb)); + aiocb._state = JOB_WIP; + aiocb._errno = SET_ERROR(EINPROGRESS); + aiocb._retval = -1; + error = copyout(&aiocb, aiocb_uptr, sizeof(struct aiocb)); if (error) return error; @@ -590,7 +597,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) * Store the user-space pointer for searching. Since we * are storing only per proc pointers - it is safe. */ - memcpy(&a_job->aiocbp, &aiocbp, sizeof(struct aiocb)); + memcpy(&a_job->aiocbp, &aiocb, sizeof(struct aiocb)); a_job->aiocb_uptr = aiocb_uptr; a_job->aio_op |= op; a_job->lio = lio; @@ -614,6 +621,16 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) a_job->pri = PRI_KTHREAD; a_job->p = curlwp->l_proc; + struct aiocbp *aiocbp = kmem_zalloc(sizeof(struct aiocbp), KM_SLEEP); + aiocbp->job = a_job; + aiocbp->uptr = aiocb_uptr; + + error = aiocbp_insert(aio, aiocbp); + if (error) { + mutex_exit(&aio->aio_mtx); + return SET_ERROR(error); + } + error = aiosp_enqueue_job(a_job); if (error) { mutex_exit(&aio->aio_mtx); diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 509f29d319310..287d6aa15c391 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -296,7 +296,7 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, if (powerof2(aiost->ops_total)) { size_t total = aiosp_ops_expected(aiost->ops_total); aiost->ops = kmem_alloc(total * - sizeof(struct aiost), KM_SLEEP); + sizeof(*aiost->ops), KM_SLEEP); } aiost->ops[aiost->ops_total++] = &ops; @@ -796,7 +796,7 @@ aiost_terminate(struct aiost *st) mutex_enter(&st->mtx); size_t total = aiosp_ops_expected(st->ops_total); - kmem_free(st->ops, total * sizeof(struct aiost)); + kmem_free(st->ops, total * sizeof(*st->ops)); st->ops = NULL; error = aiost_teardown(st); @@ -991,7 +991,7 @@ aiocbp_lookup(struct aioproc *aioproc, struct aiocbp **aiocbpp, void *uptr) hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; - LIST_FOREACH(aiocbp, &aioproc->aio_hash[hash], list) { + TAILQ_FOREACH(aiocbp, &aioproc->aio_hash[hash], list) { if (aiocbp->uptr == uptr) { *aiocbpp = aiocbp; return 0; @@ -1005,20 +1005,17 @@ aiocbp_lookup(struct aioproc *aioproc, struct aiocbp **aiocbpp, void *uptr) * aiocbp hash removal */ int -aiocbp_remove(struct aioproc *aioproc, struct aiocbp *aiocbp, void *uptr) +aiocbp_remove(struct aioproc *aioproc, void *uptr) { - struct aiocbp *found; + struct aiocbp *aiocbp; u_int hash; hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; - LIST_FOREACH(found, &aioproc->aio_hash[hash], list) { - if (found->uptr == uptr) { - if (found != aiocbp) { - return EINVAL; - } - - LIST_REMOVE(aiocbp, list); + struct aiocbp *tmp; + TAILQ_FOREACH_SAFE(aiocbp, &aioproc->aio_hash[hash], list, tmp) { + if (aiocbp->uptr == uptr) { + TAILQ_REMOVE(&aioproc->aio_hash[hash], aiocbp, list); return 0; } } @@ -1030,22 +1027,23 @@ aiocbp_remove(struct aioproc *aioproc, struct aiocbp *aiocbp, void *uptr) * aiocbp hash insertion */ int -aiocbp_insert(struct aioproc *aioproc, struct aiocbp *aiocbp, void *uptr) +aiocbp_insert(struct aioproc *aioproc, struct aiocbp *aiocbp) { struct aiocbp *found; + void *uptr; u_int hash; + uptr = aiocbp->uptr; hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; - LIST_FOREACH(found, &aioproc->aio_hash[hash], list) { + TAILQ_FOREACH(found, &aioproc->aio_hash[hash], list) { if (found->uptr == uptr) { found->job = aiocbp->job; return EEXIST; } } - aiocbp->uptr = uptr; - LIST_INSERT_HEAD(&aioproc->aio_hash[hash], aiocbp, list); + TAILQ_INSERT_HEAD(&aioproc->aio_hash[hash], aiocbp, list); return 0; } @@ -1060,14 +1058,14 @@ aiocbp_init(struct aioproc *aioproc, u_int hashsize) return EINVAL; } - aioproc->aio_hash = kmem_zalloc(hashsize * sizeof(struct aiocbp_list), + aioproc->aio_hash = kmem_zalloc(hashsize * sizeof(aioproc->aio_hash), KM_SLEEP); aioproc->aio_hash_mask = hashsize - 1; aioproc->aio_hash_size = hashsize; for (size_t i = 0; i < hashsize; i++) { - LIST_INIT(&aioproc->aio_hash[i]); + TAILQ_INIT(&aioproc->aio_hash[i]); } return 0; @@ -1079,11 +1077,24 @@ aiocbp_init(struct aioproc *aioproc, u_int hashsize) void aiocbp_destroy(struct aioproc *aioproc) { - if (aioproc->aio_hash != NULL) { - kmem_free(aioproc->aio_hash, - aioproc->aio_hash_size * sizeof(struct aiocbp_list)); - aioproc->aio_hash = NULL; - aioproc->aio_hash_mask = 0; - aioproc->aio_hash_size = 0; + if (aioproc->aio_hash == NULL) { + return; } + + struct aiocbp *aiocbp; + + for (size_t i = 0; i < aioproc->aio_hash_size; i++) { + struct aiocbp *tmp; + TAILQ_FOREACH_SAFE(aiocbp, &aioproc->aio_hash[i], list, tmp) { + TAILQ_REMOVE(&aioproc->aio_hash[i], aiocbp, list); + kmem_free(aiocbp, sizeof(*aiocbp)); + } + } + + + kmem_free(aioproc->aio_hash, + aioproc->aio_hash_size * sizeof(aioproc->aio_hash)); + aioproc->aio_hash = NULL; + aioproc->aio_hash_mask = 0; + aioproc->aio_hash_size = 0; } diff --git a/sys/sys/aio.h b/sys/sys/aio.h index a54eda5c953ae..31af48117c861 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -146,7 +146,7 @@ struct aiosp { }; struct aiocbp { - LIST_ENTRY(aiocbp) list; + TAILQ_ENTRY(aiocbp) list; void *uptr; struct aio_job *job; }; @@ -158,7 +158,7 @@ struct lio_req { }; /* Structure of AIO data for process */ -LIST_HEAD(aiocbp_list, aiocbp); +TAILQ_HEAD(aiocbp_list, aiocbp); struct aioproc { kmutex_t aio_mtx; /* Protects the entire structure */ kcondvar_t aio_worker_cv; /* Signals on a new job */ @@ -192,8 +192,8 @@ int aiosp_validate_conflicts(struct aioproc *, void *); void aiocbp_destroy(struct aioproc *); int aiocbp_init(struct aioproc *, u_int); int aiocbp_lookup(struct aioproc *, struct aiocbp **, void *); -int aiocbp_remove(struct aioproc *, struct aiocbp *, void *); -int aiocbp_insert(struct aioproc *, struct aiocbp *, void *); +int aiocbp_remove(struct aioproc *, void *); +int aiocbp_insert(struct aioproc *, struct aiocbp *); #endif /* _KERNEL */ From 96bc33f96736b3293e2e9a259a73c1c27141722b Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Sat, 21 Jun 2025 15:03:46 -0600 Subject: [PATCH 22/53] style and comments --- sys/kern/sys_aiosp.c | 6 +++++- sys/sys/aio.h | 12 ++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 287d6aa15c391..609170cd0b1d9 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -315,9 +315,13 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, mutex_exit(&ops.mtx); return error; } - } + if (ops.completed) { + break; + } + } mutex_exit(&ops.mtx); + return error; } diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 31af48117c861..490fefff31371 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -111,8 +111,8 @@ struct aio_job { struct aiosp_ops { kmutex_t mtx; /* Protects this structure */ kcondvar_t done_cv; /* Signals when a job is complete */ - int completed; /* Keeps track of the number of completed jobs */ - int total; /* Keeps track of the number of total jobs */ + size_t completed; /* Keeps track of the number of completed jobs */ + size_t total; /* Keeps track of the number of total jobs */ }; /* Structure for AIO servicing thread */ @@ -125,7 +125,7 @@ struct aiost { kmutex_t service_mtx; /* Signal to activate thread */ struct aio_job *job; /* Jobs associated with the thread */ struct lwp *lwp; /* Servicing thread LWP */ - int ops_total; /* Total number of connected ops */ + size_t ops_total; /* Total number of connected ops */ struct aiosp_ops **ops; /* Array of ops */ vaddr_t kbuf; /* Shared memory buffer */ int state; /* The state of the thread */ @@ -168,9 +168,9 @@ struct aioproc { TAILQ_HEAD(, aio_job) jobs_queue;/* Queue of the AIO jobs */ struct lwp *aio_worker; /* AIO worker thread */ struct aiost_list aiost_total; /* Total list of servicing threads */ - struct aiocbp_list *aio_hash; - size_t aio_hash_size; - u_int aio_hash_mask; + struct aiocbp_list *aio_hash; /* Aiocbp hash root */ + size_t aio_hash_size; /* Total number of buckets */ + u_int aio_hash_mask; /* Hash mask */ }; extern u_int aio_listio_max; From c868d4021a3ce04fdd4dbedd7bf7db3f91514e5a Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Mon, 23 Jun 2025 18:49:19 -0600 Subject: [PATCH 23/53] aiosp init fini and style --- sys/kern/sys_aiosp.c | 90 ++++++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 33 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 609170cd0b1d9..1f2a83512c507 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -66,7 +66,10 @@ static int aiosp_initialize(struct aiosp *, pri_t); static int aiosp_destroy(struct aiosp *); static int aiosp_retrieve_bank(pri_t, struct aiosp **); static int aiosp_pri_idx(pri_t); + static size_t aiosp_ops_expected(size_t); +static void aiosp_ops_init(struct aiosp_ops *); +static void aiosp_ops_fini(struct aiosp_ops *); static int aiost_create(struct aiosp *, struct aiost **); static int aiost_terminate(struct aiost *); @@ -213,7 +216,7 @@ aiosp_distribute_jobs(struct aiosp *sp) kmem_free(aiost_list + total_dispensed, sizeof(*aiost_list) * sp->jobs_pending); mutex_exit(&aiost->mtx); - goto finish; + break; } TAILQ_REMOVE(&sp->jobs, job, list); @@ -225,7 +228,7 @@ aiosp_distribute_jobs(struct aiosp *sp) mutex_exit(&aiost->mtx); } -finish: + mutex_exit(&sp->mtx); for (int i = 0; i < total_dispensed; i++) { @@ -267,9 +270,8 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, timo = 0; } - struct aiosp_ops ops = { 0 }; - cv_init(&ops.done_cv, "aiodone"); - mutex_init(&ops.mtx, MUTEX_DEFAULT, IPL_NONE); + struct aiosp_ops ops; + aiosp_ops_init(&ops); for (int i = 0; i < nent; i++) { if (aiocbp_list[i] == NULL) { @@ -313,6 +315,8 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, } mutex_exit(&ops.mtx); + aiosp_ops_fini(&ops); + return error; } @@ -321,6 +325,7 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, } } mutex_exit(&ops.mtx); + aiosp_ops_fini(&ops); return error; } @@ -457,7 +462,9 @@ aiosp_destroy(struct aiosp *sp) TAILQ_FOREACH_SAFE(st, &sp->freelist, list, tmp) { error = aiost_terminate(st); if (error) { - goto finish; + kmem_free(sp, sizeof(*sp)); + mutex_exit(&sp->mtx); + return error; } kmem_free(st, sizeof(*st)); @@ -466,16 +473,18 @@ aiosp_destroy(struct aiosp *sp) TAILQ_FOREACH_SAFE(st, &sp->active, list, tmp) { error = aiost_terminate(st); if (error) { - goto finish; + kmem_free(sp, sizeof(*sp)); + mutex_exit(&sp->mtx); + return error; } kmem_free(st, sizeof(*st)); } -finish: + kmem_free(sp, sizeof(*sp)); mutex_exit(&sp->mtx); - return error; + return 0; } /* @@ -567,9 +576,19 @@ aiost_entry(void *arg) mutex_enter(&st->mtx); if (st->state & AIOST_STATE_OPERATION) { - goto process_operation; + break; } else if (st->state & AIOST_STATE_TERMINATE) { - goto process_termination; + /* + * Remove st from the list of active service + * threads, do NOT append to the freelist, dance + * around locks, exit kthread + */ + mutex_enter(&sp->mtx); + TAILQ_REMOVE(&sp->freelist, st, list); + sp->nthreads_free--; + mutex_exit(&sp->mtx); + mutex_exit(&st->mtx); + kthread_exit(0); } else if (st->state & AIOST_STATE_NONE) { mutex_exit(&st->mtx); mutex_enter(&st->service_mtx); @@ -589,37 +608,20 @@ aiost_entry(void *arg) st->state); } } -process_termination: - /* - * Remove st from the list of active service threads, do NOT - * append to the freelist, dance around locks, exit kthread - */ - mutex_enter(&sp->mtx); - TAILQ_REMOVE(&sp->freelist, st, list); - sp->nthreads_free--; - mutex_exit(&sp->mtx); - mutex_exit(&st->mtx); - kthread_exit(0); -process_operation: + job = st->job; if (job->aio_op & (AIO_READ | AIO_WRITE)) { error = aiost_process_rw(st); - if (error) { - mutex_exit(&st->mtx); - goto next; - } } else if (job->aio_op & AIO_SYNC) { error = aiost_process_sync(st); - if (error) { - mutex_exit(&st->mtx); - goto next; - } } else { panic("aio_process: invalid operation code\n"); } - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); -next: + if (!error) { + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + } + st->state = AIOST_STATE_NONE; mutex_exit(&st->mtx); @@ -953,6 +955,28 @@ aiosp_flush(struct aioproc *aioproc) return error; } +/* + * initialises aiosp_ops + */ +static void +aiosp_ops_init(struct aiosp_ops *ops) +{ + ops->completed = 0; + ops->total = 0; + cv_init(&ops->done_cv, "aiodone"); + mutex_init(&ops->mtx, MUTEX_DEFAULT, IPL_NONE); +} + +/* + * cleans up aiosp_ops + */ +static void +aiosp_ops_fini(struct aiosp_ops *ops) +{ + cv_destroy(&ops->done_cv); + mutex_destroy(&ops->mtx); +} + /* * Ensure that the same job can not be enqueued twice. */ From dd7e0f6f368e5192798a1fcd9d600a7cb0c7f7ff Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Mon, 23 Jun 2025 21:16:59 -0600 Subject: [PATCH 24/53] robust suspend options and bugs --- sys/kern/sys_aio.c | 7 +++++++ sys/kern/sys_aiosp.c | 23 +++++++++++++++++------ sys/sys/aio.h | 10 +++++++++- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index f0d0806e93dfa..d89c6f10b2031 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -890,7 +890,14 @@ sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, error = copyin(SCARG(uap, list), list, nent * sizeof(*list)); if (error) goto out; +#ifdef AIOSP + struct proc *p = l->l_proc; + struct aioproc *aio = p->p_aio; + error = aiosp_suspend(aio, list, nent, SCARG(uap, timeout) ? + &ts : NULL, AIOSP_SUSPEND_ANY); +#else error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); +#endif out: kmem_free(list, nent * sizeof(*list)); return error; diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 1f2a83512c507..59375a8c0a2df 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -252,10 +252,11 @@ aiosp_distribute_jobs(struct aiosp *sp) */ int aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, - struct timespec *ts) + struct timespec *ts, uint32_t flags) { int error; int timo; + int target = 0; if (ts) { timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); @@ -270,6 +271,14 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, timo = 0; } + if (flags & AIOSP_SUSPEND_ANY) { + target = 1; + } else if (flags & AIOSP_SUSPEND_ALL) { + target = nent; + } else if (flags & AIOSP_SUSPEND_N) { + target = AIOSP_SUSPEND_NEXTRACT(flags); + } + struct aiosp_ops ops; aiosp_ops_init(&ops); @@ -307,7 +316,7 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, } mutex_enter(&ops.mtx); - for (; ops.completed != ops.total;) { + for (; ops.completed < target;) { error = cv_timedwait_sig(&ops.done_cv, &ops.mtx, timo); if (error) { if (error == EWOULDBLOCK) { @@ -319,10 +328,6 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, return error; } - - if (ops.completed) { - break; - } } mutex_exit(&ops.mtx); aiosp_ops_fini(&ops); @@ -401,6 +406,10 @@ aiosp_pri_idx(pri_t pri) static size_t aiosp_ops_expected(size_t total) { + if (total <= 1) { + return 1; + } + total -= 1; for (int j = 0; j < ilog2(sizeof(total) * 8); j++) { total |= total >> (1 << j); @@ -882,6 +891,8 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) upa, protections, 0); } + job->aiost = aiost; + pmap_update(pmap_kernel()); *kbuf = kva; diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 490fefff31371..485de35bae4ef 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -107,6 +107,13 @@ struct aio_job { #define AIOST_STATE_OPERATION 0x2 #define AIOST_STATE_TERMINATE 0x4 +#define AIOSP_SUSPEND_ANY 0x1 +#define AIOSP_SUSPEND_ALL 0x2 +#define AIOSP_SUSPEND_N 0x4 + +#define AIOSP_SUSPEND_NMASK(N) ((N) & 0xffff) << 16) +#define AIOSP_SUSPEND_NEXTRACT(FLAGS) (((FLAGS) >> 16) & 0xffff) + /* Structure for tracking the status of a collection of OPS */ struct aiosp_ops { kmutex_t mtx; /* Protects this structure */ @@ -185,7 +192,8 @@ int aio_suspend1(struct lwp *, struct aiocb **, int, struct timespec *); int aiosp_distribute_jobs(struct aiosp *); int aiosp_dispense_bank(void); int aiosp_enqueue_job(struct aio_job *); -int aiosp_suspend(struct aioproc *, struct aiocb **, int, struct timespec *); +int aiosp_suspend(struct aioproc *, struct aiocb **, int, struct timespec *, + uint32_t); int aiosp_flush(struct aioproc *); int aiosp_validate_conflicts(struct aioproc *, void *); From 9f9c16f5b28b768c1fa0324b1dca65b46930c582 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Thu, 26 Jun 2025 02:33:21 -0600 Subject: [PATCH 25/53] major refactoring addressing critical race conditions --- sys/kern/sys_aio.c | 11 ++- sys/kern/sys_aiosp.c | 210 +++++++++++++++++++++++++++++++------------ sys/sys/aio.h | 5 +- 3 files changed, 164 insertions(+), 62 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index d89c6f10b2031..1d0c22813cd2b 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -601,6 +601,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) a_job->aiocb_uptr = aiocb_uptr; a_job->aio_op |= op; a_job->lio = lio; + a_job->aiost = NULL; /* * Add the job to the queue, update the counters, and @@ -625,21 +626,20 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) aiocbp->job = a_job; aiocbp->uptr = aiocb_uptr; + mutex_exit(&aio->aio_mtx); + error = aiocbp_insert(aio, aiocbp); if (error) { - mutex_exit(&aio->aio_mtx); return SET_ERROR(error); } error = aiosp_enqueue_job(a_job); if (error) { - mutex_exit(&aio->aio_mtx); return SET_ERROR(error); } error = aiosp_dispense_bank(); if (error) { - mutex_exit(&aio->aio_mtx); return SET_ERROR(error); } #else @@ -648,9 +648,8 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) if (lio) lio->refcnt++; cv_signal(&aio->aio_worker_cv); -#endif mutex_exit(&aio->aio_mtx); - +#endif /* * One would handle the errors only with aio_error() function. * This way is appropriate according to POSIX. @@ -894,7 +893,7 @@ sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, struct proc *p = l->l_proc; struct aioproc *aio = p->p_aio; error = aiosp_suspend(aio, list, nent, SCARG(uap, timeout) ? - &ts : NULL, AIOSP_SUSPEND_ANY); + &ts : NULL, AIOSP_SUSPEND_ALL); #else error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); #endif diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 59375a8c0a2df..e9f6f6d9a6b01 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -205,12 +205,6 @@ aiosp_distribute_jobs(struct aiosp *sp) mutex_enter(&aiost->mtx); - TAILQ_REMOVE(&sp->freelist, aiost, list); - sp->nthreads_free--; - - TAILQ_INSERT_TAIL(&sp->active, aiost, list); - sp->nthreads_active++; - error = aiost_configure(aiost, job, &aiost->kbuf); if (error) { kmem_free(aiost_list + total_dispensed, @@ -219,9 +213,17 @@ aiosp_distribute_jobs(struct aiosp *sp) break; } + TAILQ_REMOVE(&sp->freelist, aiost, list); + sp->nthreads_free--; + + TAILQ_INSERT_TAIL(&sp->active, aiost, list); + sp->nthreads_active++; + TAILQ_REMOVE(&sp->jobs, job, list); aiost->job = job; + //printf("assigning job {%lx} to aiost {%lx}\n", (uintptr_t)job, (uintptr_t)aiost); + job->aiost = aiost; aiost_list[total_dispensed++] = aiost; sp->jobs_pending--; @@ -233,11 +235,15 @@ aiosp_distribute_jobs(struct aiosp *sp) for (int i = 0; i < total_dispensed; i++) { struct aiost *aiost = aiost_list[i]; + mutex_enter(&aiost->mtx); aiost->state = AIOST_STATE_OPERATION; cv_signal(&aiost->service_cv); + mutex_exit(&aiost->mtx); } - kmem_free(aiost_list, sizeof(*aiost_list) * total_dispensed); + if (total_dispensed) { + kmem_free(aiost_list, sizeof(*aiost_list) * total_dispensed); + } return error; } @@ -254,9 +260,10 @@ int aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, struct timespec *ts, uint32_t flags) { + struct aio_job *job; int error; int timo; - int target = 0; + size_t target = 0; if (ts) { timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); @@ -279,9 +286,18 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, target = AIOSP_SUSPEND_NEXTRACT(flags); } - struct aiosp_ops ops; - aiosp_ops_init(&ops); + struct aiosp_ops *ops = kmem_zalloc(sizeof(*ops), KM_SLEEP); + aiosp_ops_init(ops); + /* + * We want a hash table that tracks jobs, using uptr as a key. We use + * this to track job completion status. How do we handle the case where + * a job is completed with one aiost, then completed, then another job + * enqueued and assigned to that exact aiost. This makes it such that + * both aiosts are assigned to both threads. + */ + + mutex_enter(&ops->mtx); for (int i = 0; i < nent; i++) { if (aiocbp_list[i] == NULL) { continue; @@ -290,47 +306,78 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, struct aiocbp *aiocbp = NULL; error = aiocbp_lookup(aioproc, &aiocbp, aiocbp_list[i]); if (error) { + mutex_exit(&ops->mtx); + aiosp_ops_fini(ops); + kmem_free(ops, sizeof(*ops)); return error; } if (aiocbp == NULL) { continue; } + job = aiocbp->job; + struct aiost *aiost = aiocbp->job->aiost; - if (aiost->state == AIOST_STATE_TERMINATE || - aiost->state == AIOST_STATE_NONE) { - ops.completed++; + if (aiost == NULL) { + if (job->completed) { + ops->completed++; + } + continue; } - mutex_enter(&aiost->mtx); + mutex_enter(&aiost->ops_mtx); - if (powerof2(aiost->ops_total)) { - size_t total = aiosp_ops_expected(aiost->ops_total); - aiost->ops = kmem_alloc(total * - sizeof(*aiost->ops), KM_SLEEP); + if (job->completed) { + mutex_exit(&aiost->ops_mtx); + ops->completed++; + continue; } - aiost->ops[aiost->ops_total++] = &ops; + if (powerof2(aiost->ops_total + 1)) { + size_t old_size = aiost->ops_total ? + aiosp_ops_expected(aiost->ops_total) : 0; + size_t new_size = aiosp_ops_expected(aiost->ops_total + 1); - mutex_exit(&aiost->mtx); + struct aiosp_ops **new_ops = kmem_zalloc(new_size * + sizeof(*new_ops), KM_SLEEP); + + if (aiost->ops && old_size > 0) { + memcpy(new_ops, aiost->ops, + aiost->ops_total * sizeof(*aiost->ops)); + kmem_free(aiost->ops, old_size * sizeof(*aiost->ops)); + } + + aiost->ops = new_ops; + } + + aiost->ops[aiost->ops_total] = ops; + aiost->ops_total += 1; + ops->total++; + + mutex_exit(&aiost->ops_mtx); } - mutex_enter(&ops.mtx); - for (; ops.completed < target;) { - error = cv_timedwait_sig(&ops.done_cv, &ops.mtx, timo); + for (; ops->completed < target;) { + mutex_exit(&ops->mtx); + mutex_enter(&ops->done_mtx); + //printf("waiting on ops %ld %ld\n", ops->completed, target); + error = cv_timedwait_sig(&ops->done_cv, &ops->done_mtx, timo); + mutex_exit(&ops->done_mtx); if (error) { if (error == EWOULDBLOCK) { error = SET_ERROR(EAGAIN); } - mutex_exit(&ops.mtx); - aiosp_ops_fini(&ops); - + aiosp_ops_fini(ops); + kmem_free(ops, sizeof(*ops)); return error; } + mutex_enter(&ops->mtx); } - mutex_exit(&ops.mtx); - aiosp_ops_fini(&ops); + + mutex_exit(&ops->mtx); + aiosp_ops_fini(ops); + kmem_free(ops, sizeof(*ops)); return error; } @@ -533,6 +580,7 @@ aiost_create(struct aiosp *sp, struct aiost **ret) mutex_init(&st->mtx, MUTEX_DEFAULT, IPL_NONE); mutex_init(&st->service_mtx, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&st->ops_mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&st->service_cv, "aioservice"); mutex_enter(&sp->mtx); @@ -609,7 +657,6 @@ aiost_entry(void *arg) * Thread was interrupt. Check for * pending exit or suspension */ - mutex_exit(&st->service_mtx); lwp_userret(curlwp); } } else { @@ -631,7 +678,38 @@ aiost_entry(void *arg) aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); } + job->completed = true; + + mutex_enter(&st->ops_mtx); + //printf("I am completing an op with %ld on aiost {%lx}\n", st->ops_total, (uintptr_t)st); + for (int i = 0; i < st->ops_total; i++) { + struct aiosp_ops *ops = st->ops[i]; + if (ops == NULL) { + continue; + } + + mutex_enter(&ops->mtx); + KASSERT(ops->total > ops->completed); + ops->completed++; + mutex_exit(&ops->mtx); + cv_signal(&ops->done_cv); + } + + if (st->ops && st->ops_total) { + size_t total = aiosp_ops_expected(st->ops_total); + kmem_free(st->ops, total * sizeof(*st->ops)); + st->ops_total = 0; + st->ops = NULL; + } + mutex_exit(&st->ops_mtx); + + error = aiost_teardown(st); + if (error) { + panic("aiost_entry: aiost_teardown failure"); + } + st->state = AIOST_STATE_NONE; + st->job = NULL; mutex_exit(&st->mtx); /* @@ -644,6 +722,9 @@ aiost_entry(void *arg) TAILQ_REMOVE(&sp->active, st, list); sp->nthreads_active--; + //printf("are we appending? {%lx}!\n", (uintptr_t)st); + // CLEAR ITSELF OUT AND/OR NULLIFY JOB->AIOST + TAILQ_INSERT_TAIL(&sp->freelist, st, list); sp->nthreads_free++; @@ -686,18 +767,18 @@ aiost_process_rw(struct aiost *aiost) struct iovec aiov; struct uio auio; - + if (aiocbp->aio_nbytes > SSIZE_MAX) { error = SET_ERROR(EINVAL); goto done; } - + fp = fd_getfile(fd); if (fp == NULL) { error = SET_ERROR(EBADF); goto done; } - + aiov.iov_base = (void *)(uintptr_t)aiost->kbuf; aiov.iov_len = aiocbp->aio_nbytes; auio.uio_iov = &aiov; @@ -710,7 +791,7 @@ aiost_process_rw(struct aiost *aiost) * Perform a Read operation */ KASSERT((job->aio_op & AIO_WRITE) == 0); - + if ((fp->f_flag & FREAD) == 0) { fd_putfile(fd); error = SET_ERROR(EBADF); @@ -724,7 +805,7 @@ aiost_process_rw(struct aiost *aiost) * Perform a Write operation */ KASSERT(job->aio_op & AIO_WRITE); - + if ((fp->f_flag & FWRITE) == 0) { fd_putfile(fd); error = SET_ERROR(EBADF); @@ -735,7 +816,7 @@ aiost_process_rw(struct aiost *aiost) &auio, fp->f_cred, FOF_UPDATE_OFFSET); } fd_putfile(fd); - + /* * Store the result value */ @@ -764,17 +845,17 @@ aiost_process_sync(struct aiost *aiost) * Perform a file sync operation */ struct vnode *vp; - + if ((error = fd_getvnode(fd, &fp)) != 0) { goto done; } - + if ((fp->f_flag & FWRITE) == 0) { fd_putfile(fd); error = SET_ERROR(EBADF); goto done; } - + vp = fp->f_vnode; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (job->aio_op & AIO_DSYNC) { @@ -786,7 +867,7 @@ aiost_process_sync(struct aiost *aiost) } VOP_UNLOCK(vp); fd_putfile(fd); - + /* * Store the result value */ @@ -812,7 +893,6 @@ aiost_terminate(struct aiost *st) size_t total = aiosp_ops_expected(st->ops_total); kmem_free(st->ops, total * sizeof(*st->ops)); - st->ops = NULL; error = aiost_teardown(st); if (error) { @@ -827,6 +907,8 @@ aiost_terminate(struct aiost *st) cv_destroy(&st->service_cv); mutex_destroy(&st->mtx); + mutex_destroy(&st->ops_mtx); + mutex_destroy(&st->service_mtx); kmem_free(st, sizeof(*st)); return error; @@ -844,6 +926,7 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) vaddr_t uva, kva; paddr_t upa; int error; + bool success; vm_prot_t protections = VM_PROT_NONE; if (job->aio_op == AIO_READ) { @@ -858,7 +941,7 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) * To account for the case where the memory is anonymously mapped and * has not yet been fulfilled. */ - error = uvm_vslock(vm, job->aiocb_uptr, aiocb->aio_nbytes, + error = uvm_vslock(vm, aiocb->aio_buf, aiocb->aio_nbytes, protections); if (error) { return error; @@ -867,7 +950,7 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) kva = uvm_km_alloc(kernel_map, aiocb->aio_nbytes, 0, UVM_KMF_VAONLY); if (!kva) { - uvm_vsunlock(vm, job->aiocb_uptr, aiocb->aio_nbytes); + uvm_vsunlock(vm, aiocb->aio_buf, aiocb->aio_nbytes); return ENOMEM; } @@ -878,11 +961,11 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) uva < round_page((vaddr_t)aiocb->aio_buf + aiocb->aio_nbytes); uva += PAGE_SIZE) { - error = pmap_extract(vm_map_pmap(&vm->vm_map), uva, &upa); - if (error) { + success = pmap_extract(vm_map_pmap(&vm->vm_map), uva, &upa); + if (!success) { uvm_km_free(kernel_map, kva, aiocb->aio_nbytes, UVM_KMF_VAONLY); - uvm_vsunlock(vm, job->aiocb_uptr, + uvm_vsunlock(vm, aiocb->aio_buf, aiocb->aio_nbytes); return EFAULT; } @@ -929,7 +1012,7 @@ aiost_teardown(struct aiost *aiost) } uvm_km_free(kernel_map, kva, aiocb->aio_nbytes, UVM_KMF_VAONLY); - uvm_vsunlock(vm, job->aiocb_uptr, aiocb->aio_nbytes); + uvm_vsunlock(vm, aiocb->aio_buf, aiocb->aio_nbytes); return 0; } @@ -976,6 +1059,7 @@ aiosp_ops_init(struct aiosp_ops *ops) ops->total = 0; cv_init(&ops->done_cv, "aiodone"); mutex_init(&ops->mtx, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&ops->done_mtx, MUTEX_DEFAULT, IPL_NONE); } /* @@ -986,6 +1070,7 @@ aiosp_ops_fini(struct aiosp_ops *ops) { cv_destroy(&ops->done_cv); mutex_destroy(&ops->mtx); + mutex_destroy(&ops->done_mtx); } /* @@ -994,9 +1079,9 @@ aiosp_ops_fini(struct aiosp_ops *ops) int aiosp_validate_conflicts(struct aioproc *aioproc, void *uptr) { - mutex_enter(&aioproc->aio_mtx); - struct aiost *st; + + mutex_enter(&aioproc->aio_mtx); TAILQ_FOREACH(st, &aioproc->aiost_total, list) { if (st->job->aiocb_uptr != uptr) { continue; @@ -1004,7 +1089,6 @@ aiosp_validate_conflicts(struct aioproc *aioproc, void *uptr) mutex_exit(&aioproc->aio_mtx); return EINVAL; } - mutex_exit(&aioproc->aio_mtx); return 0; @@ -1029,13 +1113,20 @@ aiocbp_lookup(struct aioproc *aioproc, struct aiocbp **aiocbpp, void *uptr) u_int hash; hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; - + + //printf("searching element with key {%lx} and hash {%x}\n", (uintptr_t)uptr, hash); + + mutex_enter(&aioproc->aio_mtx); TAILQ_FOREACH(aiocbp, &aioproc->aio_hash[hash], list) { if (aiocbp->uptr == uptr) { + //printf("element found {%lx} and the job {%lx} {%lx}\n", (uintptr_t)aiocbp, (uintptr_t)aiocbp->job, (uintptr_t)aiocbp->job->aiost); + *aiocbpp = aiocbp; + mutex_exit(&aioproc->aio_mtx); return 0; } } + mutex_exit(&aioproc->aio_mtx); return ENOENT; } @@ -1050,14 +1141,17 @@ aiocbp_remove(struct aioproc *aioproc, void *uptr) u_int hash; hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; - + struct aiocbp *tmp; + mutex_enter(&aioproc->aio_mtx); TAILQ_FOREACH_SAFE(aiocbp, &aioproc->aio_hash[hash], list, tmp) { if (aiocbp->uptr == uptr) { TAILQ_REMOVE(&aioproc->aio_hash[hash], aiocbp, list); + mutex_exit(&aioproc->aio_mtx); return 0; } } + mutex_exit(&aioproc->aio_mtx); return ENOENT; } @@ -1075,14 +1169,19 @@ aiocbp_insert(struct aioproc *aioproc, struct aiocbp *aiocbp) uptr = aiocbp->uptr; hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; + mutex_enter(&aioproc->aio_mtx); TAILQ_FOREACH(found, &aioproc->aio_hash[hash], list) { if (found->uptr == uptr) { found->job = aiocbp->job; + mutex_exit(&aioproc->aio_mtx); return EEXIST; } } - + + //printf("appending element with key {%x} onto hash {%lx} aiocbp {%lx}\n", hash, (uintptr_t)uptr, (uintptr_t)aiocbp); + TAILQ_INSERT_HEAD(&aioproc->aio_hash[hash], aiocbp, list); + mutex_exit(&aioproc->aio_mtx); return 0; } @@ -1097,7 +1196,7 @@ aiocbp_init(struct aioproc *aioproc, u_int hashsize) return EINVAL; } - aioproc->aio_hash = kmem_zalloc(hashsize * sizeof(aioproc->aio_hash), + aioproc->aio_hash = kmem_zalloc(hashsize * sizeof(*aioproc->aio_hash), KM_SLEEP); aioproc->aio_hash_mask = hashsize - 1; @@ -1122,6 +1221,7 @@ aiocbp_destroy(struct aioproc *aioproc) struct aiocbp *aiocbp; + mutex_enter(&aioproc->aio_mtx); for (size_t i = 0; i < aioproc->aio_hash_size; i++) { struct aiocbp *tmp; TAILQ_FOREACH_SAFE(aiocbp, &aioproc->aio_hash[i], list, tmp) { @@ -1130,10 +1230,10 @@ aiocbp_destroy(struct aioproc *aioproc) } } - kmem_free(aioproc->aio_hash, - aioproc->aio_hash_size * sizeof(aioproc->aio_hash)); + aioproc->aio_hash_size * sizeof(*aioproc->aio_hash)); aioproc->aio_hash = NULL; aioproc->aio_hash_mask = 0; aioproc->aio_hash_size = 0; + mutex_exit(&aioproc->aio_mtx); } diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 485de35bae4ef..cbee06a03a243 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -52,7 +52,7 @@ */ struct aiocb { off_t aio_offset; /* File offset */ - volatile void *aio_buf; /* I/O buffer in process space */ + void *aio_buf; /* I/O buffer in process space */ size_t aio_nbytes; /* Length of transfer */ int aio_fildes; /* File descriptor */ int aio_lio_opcode; /* LIO opcode */ @@ -99,6 +99,7 @@ struct aio_job { void *aiocb_uptr; /* User-space pointer for identification of job */ struct proc *p; /* Process that instantiated the job */ struct aiost *aiost; /* Service thread associated with this job */ + bool completed; /* Marks the completion status of this job */ TAILQ_ENTRY(aio_job) list; struct lio_req *lio; }; @@ -117,6 +118,7 @@ struct aio_job { /* Structure for tracking the status of a collection of OPS */ struct aiosp_ops { kmutex_t mtx; /* Protects this structure */ + kmutex_t done_mtx; /* Signals when a job is complete */ kcondvar_t done_cv; /* Signals when a job is complete */ size_t completed; /* Keeps track of the number of completed jobs */ size_t total; /* Keeps track of the number of total jobs */ @@ -132,6 +134,7 @@ struct aiost { kmutex_t service_mtx; /* Signal to activate thread */ struct aio_job *job; /* Jobs associated with the thread */ struct lwp *lwp; /* Servicing thread LWP */ + kmutex_t ops_mtx; /* Protects **ops */ size_t ops_total; /* Total number of connected ops */ struct aiosp_ops **ops; /* Array of ops */ vaddr_t kbuf; /* Shared memory buffer */ From 48aa18f7cdc24a0d56b238b4e768551ea8707498 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Sun, 29 Jun 2025 00:34:22 -0600 Subject: [PATCH 26/53] simplify synchronisation and bugs --- sys/kern/sys_aiosp.c | 120 ++++++++++++++++--------------------------- sys/sys/aio.h | 3 -- 2 files changed, 45 insertions(+), 78 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index e9f6f6d9a6b01..07ca835956a59 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -231,8 +231,6 @@ aiosp_distribute_jobs(struct aiosp *sp) mutex_exit(&aiost->mtx); } - mutex_exit(&sp->mtx); - for (int i = 0; i < total_dispensed; i++) { struct aiost *aiost = aiost_list[i]; mutex_enter(&aiost->mtx); @@ -241,6 +239,8 @@ aiosp_distribute_jobs(struct aiosp *sp) mutex_exit(&aiost->mtx); } + mutex_exit(&sp->mtx); + if (total_dispensed) { kmem_free(aiost_list, sizeof(*aiost_list) * total_dispensed); } @@ -316,7 +316,6 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, } job = aiocbp->job; - struct aiost *aiost = aiocbp->job->aiost; if (aiost == NULL) { if (job->completed) { @@ -325,18 +324,18 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, continue; } - mutex_enter(&aiost->ops_mtx); - + mutex_enter(&aiost->mtx); if (job->completed) { - mutex_exit(&aiost->ops_mtx); ops->completed++; + mutex_exit(&aiost->mtx); continue; } if (powerof2(aiost->ops_total + 1)) { size_t old_size = aiost->ops_total ? aiosp_ops_expected(aiost->ops_total) : 0; - size_t new_size = aiosp_ops_expected(aiost->ops_total + 1); + size_t new_size = aiosp_ops_expected(aiost->ops_total + + 1); struct aiosp_ops **new_ops = kmem_zalloc(new_size * sizeof(*new_ops), KM_SLEEP); @@ -344,7 +343,8 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, if (aiost->ops && old_size > 0) { memcpy(new_ops, aiost->ops, aiost->ops_total * sizeof(*aiost->ops)); - kmem_free(aiost->ops, old_size * sizeof(*aiost->ops)); + kmem_free(aiost->ops, old_size * + sizeof(*aiost->ops)); } aiost->ops = new_ops; @@ -352,27 +352,24 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, aiost->ops[aiost->ops_total] = ops; aiost->ops_total += 1; + mutex_exit(&aiost->mtx); ops->total++; - - mutex_exit(&aiost->ops_mtx); } for (; ops->completed < target;) { - mutex_exit(&ops->mtx); - mutex_enter(&ops->done_mtx); //printf("waiting on ops %ld %ld\n", ops->completed, target); - error = cv_timedwait_sig(&ops->done_cv, &ops->done_mtx, timo); - mutex_exit(&ops->done_mtx); + error = cv_timedwait_sig(&ops->done_cv, &ops->mtx, timo); if (error) { if (error == EWOULDBLOCK) { error = SET_ERROR(EAGAIN); } + mutex_exit(&ops->mtx); aiosp_ops_fini(ops); kmem_free(ops, sizeof(*ops)); + return error; } - mutex_enter(&ops->mtx); } mutex_exit(&ops->mtx); @@ -518,8 +515,8 @@ aiosp_destroy(struct aiosp *sp) TAILQ_FOREACH_SAFE(st, &sp->freelist, list, tmp) { error = aiost_terminate(st); if (error) { - kmem_free(sp, sizeof(*sp)); mutex_exit(&sp->mtx); + kmem_free(sp, sizeof(*sp)); return error; } @@ -529,16 +526,16 @@ aiosp_destroy(struct aiosp *sp) TAILQ_FOREACH_SAFE(st, &sp->active, list, tmp) { error = aiost_terminate(st); if (error) { - kmem_free(sp, sizeof(*sp)); mutex_exit(&sp->mtx); + kmem_free(sp, sizeof(*sp)); return error; } kmem_free(st, sizeof(*st)); } - kmem_free(sp, sizeof(*sp)); mutex_exit(&sp->mtx); + kmem_free(sp, sizeof(*sp)); return 0; } @@ -579,8 +576,6 @@ aiost_create(struct aiosp *sp, struct aiost **ret) st = kmem_zalloc(sizeof(*st), KM_SLEEP); mutex_init(&st->mtx, MUTEX_DEFAULT, IPL_NONE); - mutex_init(&st->service_mtx, MUTEX_DEFAULT, IPL_NONE); - mutex_init(&st->ops_mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&st->service_cv, "aioservice"); mutex_enter(&sp->mtx); @@ -628,43 +623,33 @@ aiost_entry(void *arg) * terminated aiost_terminate(st) unblocks those sleeping on * st->service_cv */ - for (;;) { - for (;;) { - mutex_enter(&st->mtx); - - if (st->state & AIOST_STATE_OPERATION) { - break; - } else if (st->state & AIOST_STATE_TERMINATE) { + mutex_enter(&st->mtx); + for(;;) { + for (; st->state & AIOST_STATE_NONE;) { + error = cv_wait_sig(&st->service_cv, &st->mtx); + if (error) { /* - * Remove st from the list of active service - * threads, do NOT append to the freelist, dance - * around locks, exit kthread + * Thread was interrupt. Check for pending exit + * or suspension */ - mutex_enter(&sp->mtx); - TAILQ_REMOVE(&sp->freelist, st, list); - sp->nthreads_free--; - mutex_exit(&sp->mtx); mutex_exit(&st->mtx); - kthread_exit(0); - } else if (st->state & AIOST_STATE_NONE) { - mutex_exit(&st->mtx); - mutex_enter(&st->service_mtx); - error = cv_wait_sig(&st->service_cv, - &st->service_mtx); - mutex_exit(&st->service_mtx); - if (error) { - /* - * Thread was interrupt. Check for - * pending exit or suspension - */ - lwp_userret(curlwp); - } - } else { - panic("aio_process: invalid aiost state {%x}\n", - st->state); + lwp_userret(curlwp); + mutex_enter(&st->mtx); } } + if (st->state & AIOST_STATE_TERMINATE) { + mutex_enter(&sp->mtx); + TAILQ_REMOVE(&sp->freelist, st, list); + sp->nthreads_free--; + mutex_exit(&sp->mtx); + mutex_exit(&st->mtx); + kthread_exit(0); + } else if ((st->state & AIOST_STATE_OPERATION) == 0) { + panic("aio_process: invalid aiost state {%x}\n", + st->state); + } + job = st->job; if (job->aio_op & (AIO_READ | AIO_WRITE)) { error = aiost_process_rw(st); @@ -674,14 +659,13 @@ aiost_entry(void *arg) panic("aio_process: invalid operation code\n"); } - if (!error) { - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); - } - job->completed = true; + job->aiost = NULL; + st->state = AIOST_STATE_NONE; + st->job = NULL; + + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); - mutex_enter(&st->ops_mtx); - //printf("I am completing an op with %ld on aiost {%lx}\n", st->ops_total, (uintptr_t)st); for (int i = 0; i < st->ops_total; i++) { struct aiosp_ops *ops = st->ops[i]; if (ops == NULL) { @@ -701,16 +685,6 @@ aiost_entry(void *arg) st->ops_total = 0; st->ops = NULL; } - mutex_exit(&st->ops_mtx); - - error = aiost_teardown(st); - if (error) { - panic("aiost_entry: aiost_teardown failure"); - } - - st->state = AIOST_STATE_NONE; - st->job = NULL; - mutex_exit(&st->mtx); /* * Remove st from list of active service threads, append to @@ -799,7 +773,7 @@ aiost_process_rw(struct aiost *aiost) } auio.uio_rw = UIO_READ; error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, - &auio, fp->f_cred, FOF_UPDATE_OFFSET); + &auio, fp->f_cred, FOF_UPDATE_OFFSET); } else { /* * Perform a Write operation @@ -813,7 +787,7 @@ aiost_process_rw(struct aiost *aiost) } auio.uio_rw = UIO_WRITE; error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, - &auio, fp->f_cred, FOF_UPDATE_OFFSET); + &auio, fp->f_cred, FOF_UPDATE_OFFSET); } fd_putfile(fd); @@ -860,10 +834,10 @@ aiost_process_sync(struct aiost *aiost) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (job->aio_op & AIO_DSYNC) { error = VOP_FSYNC(vp, fp->f_cred, - FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); + FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); } else if (job->aio_op & AIO_SYNC) { error = VOP_FSYNC(vp, fp->f_cred, - FSYNC_WAIT, 0, 0); + FSYNC_WAIT, 0, 0); } VOP_UNLOCK(vp); fd_putfile(fd); @@ -907,8 +881,6 @@ aiost_terminate(struct aiost *st) cv_destroy(&st->service_cv); mutex_destroy(&st->mtx); - mutex_destroy(&st->ops_mtx); - mutex_destroy(&st->service_mtx); kmem_free(st, sizeof(*st)); return error; @@ -1059,7 +1031,6 @@ aiosp_ops_init(struct aiosp_ops *ops) ops->total = 0; cv_init(&ops->done_cv, "aiodone"); mutex_init(&ops->mtx, MUTEX_DEFAULT, IPL_NONE); - mutex_init(&ops->done_mtx, MUTEX_DEFAULT, IPL_NONE); } /* @@ -1070,7 +1041,6 @@ aiosp_ops_fini(struct aiosp_ops *ops) { cv_destroy(&ops->done_cv); mutex_destroy(&ops->mtx); - mutex_destroy(&ops->done_mtx); } /* diff --git a/sys/sys/aio.h b/sys/sys/aio.h index cbee06a03a243..b125e59ab68c2 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -118,7 +118,6 @@ struct aio_job { /* Structure for tracking the status of a collection of OPS */ struct aiosp_ops { kmutex_t mtx; /* Protects this structure */ - kmutex_t done_mtx; /* Signals when a job is complete */ kcondvar_t done_cv; /* Signals when a job is complete */ size_t completed; /* Keeps track of the number of completed jobs */ size_t total; /* Keeps track of the number of total jobs */ @@ -131,10 +130,8 @@ struct aiost { struct aiosp *aiosp; /* Servicing pool of this thread */ kmutex_t mtx; /* Protects this structure */ kcondvar_t service_cv; /* Signal to activate thread */ - kmutex_t service_mtx; /* Signal to activate thread */ struct aio_job *job; /* Jobs associated with the thread */ struct lwp *lwp; /* Servicing thread LWP */ - kmutex_t ops_mtx; /* Protects **ops */ size_t ops_total; /* Total number of connected ops */ struct aiosp_ops **ops; /* Array of ops */ vaddr_t kbuf; /* Shared memory buffer */ From 738731268dffd2aa1d83f115913169f3255b423f Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Tue, 1 Jul 2025 18:15:03 -0600 Subject: [PATCH 27/53] job distribution simplification and race conditions --- sys/kern/sys_aiosp.c | 103 +++++++++++++++++++------------------------ sys/sys/aio.h | 1 + 2 files changed, 47 insertions(+), 57 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 07ca835956a59..37204f0efb632 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -156,10 +156,7 @@ aiosp_modcmd(modcmd_t cmd, void *arg) int aiosp_distribute_jobs(struct aiosp *sp) { - //struct proc *p = curlwp->l_proc; - struct aiost **aiost_list; struct aio_job *job; - int total_dispensed; int error = 0; /* @@ -167,6 +164,12 @@ aiosp_distribute_jobs(struct aiosp *sp) * service threads. If it does then that means we need to create new * threads. */ + mutex_enter(&sp->mtx); + if (!sp->jobs_pending) { + mutex_exit(&sp->mtx); + return 0; + } + if (sp->jobs_pending > sp->nthreads_free) { int nthreads_new = sp->jobs_pending - sp->nthreads_free; @@ -181,21 +184,12 @@ aiosp_distribute_jobs(struct aiosp *sp) } } - if (!sp->jobs_pending) { - return 0; - } - - total_dispensed = 0; - aiost_list = kmem_zalloc(sizeof(*aiost_list) * - sp->jobs_pending, KM_SLEEP); - /* * Loop over all pending jobs and assign a thread from the freelist. * Move from freelist to active. Configure service thread to work with * respect to the job. Also signal the CV outside of sp->mtx to avoid * any shenanigans. */ - mutex_enter(&sp->mtx); struct aio_job *tmp; TAILQ_FOREACH_SAFE(job, &sp->jobs, list, tmp) { struct aiost *aiost = TAILQ_LAST(&sp->freelist, aiost_list); @@ -207,8 +201,6 @@ aiosp_distribute_jobs(struct aiosp *sp) error = aiost_configure(aiost, job, &aiost->kbuf); if (error) { - kmem_free(aiost_list + total_dispensed, - sizeof(*aiost_list) * sp->jobs_pending); mutex_exit(&aiost->mtx); break; } @@ -221,30 +213,19 @@ aiosp_distribute_jobs(struct aiosp *sp) TAILQ_REMOVE(&sp->jobs, job, list); - aiost->job = job; - //printf("assigning job {%lx} to aiost {%lx}\n", (uintptr_t)job, (uintptr_t)aiost); job->aiost = aiost; + aiost->job = job; + aiost->freelist = false; + aiost->state = AIOST_STATE_OPERATION; - aiost_list[total_dispensed++] = aiost; sp->jobs_pending--; mutex_exit(&aiost->mtx); - } - - for (int i = 0; i < total_dispensed; i++) { - struct aiost *aiost = aiost_list[i]; - mutex_enter(&aiost->mtx); - aiost->state = AIOST_STATE_OPERATION; cv_signal(&aiost->service_cv); - mutex_exit(&aiost->mtx); } mutex_exit(&sp->mtx); - if (total_dispensed) { - kmem_free(aiost_list, sizeof(*aiost_list) * total_dispensed); - } - return error; } @@ -317,18 +298,11 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, job = aiocbp->job; struct aiost *aiost = aiocbp->job->aiost; - if (aiost == NULL) { - if (job->completed) { - ops->completed++; - } - continue; - } + KASSERT(aiost); mutex_enter(&aiost->mtx); if (job->completed) { ops->completed++; - mutex_exit(&aiost->mtx); - continue; } if (powerof2(aiost->ops_total + 1)) { @@ -340,7 +314,7 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, struct aiosp_ops **new_ops = kmem_zalloc(new_size * sizeof(*new_ops), KM_SLEEP); - if (aiost->ops && old_size > 0) { + if (aiost->ops && old_size) { memcpy(new_ops, aiost->ops, aiost->ops_total * sizeof(*aiost->ops)); kmem_free(aiost->ops, old_size * @@ -351,7 +325,7 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, } aiost->ops[aiost->ops_total] = ops; - aiost->ops_total += 1; + aiost->ops_total++; mutex_exit(&aiost->mtx); ops->total++; } @@ -578,25 +552,21 @@ aiost_create(struct aiosp *sp, struct aiost **ret) mutex_init(&st->mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&st->service_cv, "aioservice"); - mutex_enter(&sp->mtx); - int error = kthread_create(PRI_KERNEL, 0, NULL, aiost_entry, st, &st->lwp, "aio_%d_%ld", p->p_pid, sp->nthreads_total); if (error) { - mutex_exit(&sp->mtx); return error; } st->job = NULL; st->state = AIOST_STATE_NONE; st->aiosp = sp; + st->freelist = true; TAILQ_INSERT_TAIL(&sp->freelist, st, list); sp->nthreads_free++; sp->nthreads_total++; - mutex_exit(&sp->mtx); - if (ret) { *ret = st; } @@ -640,17 +610,29 @@ aiost_entry(void *arg) if (st->state & AIOST_STATE_TERMINATE) { mutex_enter(&sp->mtx); - TAILQ_REMOVE(&sp->freelist, st, list); - sp->nthreads_free--; + + if (st->freelist) { + TAILQ_REMOVE(&sp->freelist, st, list); + sp->nthreads_free--; + } else { + TAILQ_REMOVE(&sp->active, st, list); + sp->nthreads_active--; + } + + sp->nthreads_total--; + mutex_exit(&sp->mtx); mutex_exit(&st->mtx); kthread_exit(0); - } else if ((st->state & AIOST_STATE_OPERATION) == 0) { + } + + if ((st->state & AIOST_STATE_OPERATION) == 0) { panic("aio_process: invalid aiost state {%x}\n", st->state); } job = st->job; + KASSERT(job != NULL); if (job->aio_op & (AIO_READ | AIO_WRITE)) { error = aiost_process_rw(st); } else if (job->aio_op & AIO_SYNC) { @@ -660,14 +642,12 @@ aiost_entry(void *arg) } job->completed = true; - job->aiost = NULL; st->state = AIOST_STATE_NONE; st->job = NULL; aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); - for (int i = 0; i < st->ops_total; i++) { - struct aiosp_ops *ops = st->ops[i]; + for (int i = 0; i < st->ops_total; i++) { struct aiosp_ops *ops = st->ops[i]; if (ops == NULL) { continue; } @@ -693,12 +673,11 @@ aiost_entry(void *arg) */ mutex_enter(&sp->mtx); + st->freelist = true; + TAILQ_REMOVE(&sp->active, st, list); sp->nthreads_active--; - //printf("are we appending? {%lx}!\n", (uintptr_t)st); - // CLEAR ITSELF OUT AND/OR NULLIFY JOB->AIOST - TAILQ_INSERT_TAIL(&sp->freelist, st, list); sp->nthreads_free++; @@ -744,12 +723,14 @@ aiost_process_rw(struct aiost *aiost) if (aiocbp->aio_nbytes > SSIZE_MAX) { error = SET_ERROR(EINVAL); + printf("WHAT? %ld\n", aiocbp->aio_nbytes); goto done; } fp = fd_getfile(fd); if (fp == NULL) { error = SET_ERROR(EBADF); + //printf("is this legit? %d %d %ld\n", fd, error, aiocbp->aio_nbytes); goto done; } @@ -800,6 +781,8 @@ aiost_process_rw(struct aiost *aiost) job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; + printf("%d ", error); + return 0; } @@ -862,11 +845,14 @@ static int aiost_terminate(struct aiost *st) { int error = 0; + size_t ops_total; + struct aiosp_ops **ops; + size_t total; mutex_enter(&st->mtx); - size_t total = aiosp_ops_expected(st->ops_total); - kmem_free(st->ops, total * sizeof(*st->ops)); + ops_total = st->ops_total; + ops = st->ops; error = aiost_teardown(st); if (error) { @@ -874,6 +860,11 @@ aiost_terminate(struct aiost *st) } st->state = AIOST_STATE_TERMINATE; + if (ops && ops_total) { + total = aiosp_ops_expected(st->ops_total); + kmem_free(ops, total * sizeof(*ops)); + } + mutex_exit(&st->mtx); cv_signal(&st->service_cv); @@ -946,10 +937,8 @@ aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) upa, protections, 0); } - job->aiost = aiost; - pmap_update(pmap_kernel()); - *kbuf = kva; + *kbuf = kva + ((uintptr_t)aiocb->aio_buf & PAGE_MASK); return 0; } diff --git a/sys/sys/aio.h b/sys/sys/aio.h index b125e59ab68c2..312580019a135 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -136,6 +136,7 @@ struct aiost { struct aiosp_ops **ops; /* Array of ops */ vaddr_t kbuf; /* Shared memory buffer */ int state; /* The state of the thread */ + bool freelist; /* Whether or not aiost is on freelist */ }; /* Structure for AIO servicing pool */ From 68e792fa4639e9194ada0de6164091eceb857690 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Tue, 1 Jul 2025 18:24:14 -0600 Subject: [PATCH 28/53] fix potential deadlock --- sys/kern/sys_aiosp.c | 24 +++++++++++++----------- sys/sys/aio.h | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 37204f0efb632..faed48fa1209d 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -1075,17 +1075,17 @@ aiocbp_lookup(struct aioproc *aioproc, struct aiocbp **aiocbpp, void *uptr) //printf("searching element with key {%lx} and hash {%x}\n", (uintptr_t)uptr, hash); - mutex_enter(&aioproc->aio_mtx); + mutex_enter(&aioproc->aio_hash_mtx); TAILQ_FOREACH(aiocbp, &aioproc->aio_hash[hash], list) { if (aiocbp->uptr == uptr) { //printf("element found {%lx} and the job {%lx} {%lx}\n", (uintptr_t)aiocbp, (uintptr_t)aiocbp->job, (uintptr_t)aiocbp->job->aiost); *aiocbpp = aiocbp; - mutex_exit(&aioproc->aio_mtx); + mutex_exit(&aioproc->aio_hash_mtx); return 0; } } - mutex_exit(&aioproc->aio_mtx); + mutex_exit(&aioproc->aio_hash_mtx); return ENOENT; } @@ -1102,15 +1102,15 @@ aiocbp_remove(struct aioproc *aioproc, void *uptr) hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; struct aiocbp *tmp; - mutex_enter(&aioproc->aio_mtx); + mutex_enter(&aioproc->aio_hash_mtx); TAILQ_FOREACH_SAFE(aiocbp, &aioproc->aio_hash[hash], list, tmp) { if (aiocbp->uptr == uptr) { TAILQ_REMOVE(&aioproc->aio_hash[hash], aiocbp, list); - mutex_exit(&aioproc->aio_mtx); + mutex_exit(&aioproc->aio_hash_mtx); return 0; } } - mutex_exit(&aioproc->aio_mtx); + mutex_exit(&aioproc->aio_hash_mtx); return ENOENT; } @@ -1128,11 +1128,11 @@ aiocbp_insert(struct aioproc *aioproc, struct aiocbp *aiocbp) uptr = aiocbp->uptr; hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; - mutex_enter(&aioproc->aio_mtx); + mutex_enter(&aioproc->aio_hash_mtx); TAILQ_FOREACH(found, &aioproc->aio_hash[hash], list) { if (found->uptr == uptr) { found->job = aiocbp->job; - mutex_exit(&aioproc->aio_mtx); + mutex_exit(&aioproc->aio_hash_mtx); return EEXIST; } } @@ -1140,7 +1140,7 @@ aiocbp_insert(struct aioproc *aioproc, struct aiocbp *aiocbp) //printf("appending element with key {%x} onto hash {%lx} aiocbp {%lx}\n", hash, (uintptr_t)uptr, (uintptr_t)aiocbp); TAILQ_INSERT_HEAD(&aioproc->aio_hash[hash], aiocbp, list); - mutex_exit(&aioproc->aio_mtx); + mutex_exit(&aioproc->aio_hash_mtx); return 0; } @@ -1161,6 +1161,8 @@ aiocbp_init(struct aioproc *aioproc, u_int hashsize) aioproc->aio_hash_mask = hashsize - 1; aioproc->aio_hash_size = hashsize; + mutex_init(&aioproc->aio_hash_mtx, MUTEX_DEFAULT, IPL_NONE); + for (size_t i = 0; i < hashsize; i++) { TAILQ_INIT(&aioproc->aio_hash[i]); } @@ -1180,7 +1182,7 @@ aiocbp_destroy(struct aioproc *aioproc) struct aiocbp *aiocbp; - mutex_enter(&aioproc->aio_mtx); + mutex_enter(&aioproc->aio_hash_mtx); for (size_t i = 0; i < aioproc->aio_hash_size; i++) { struct aiocbp *tmp; TAILQ_FOREACH_SAFE(aiocbp, &aioproc->aio_hash[i], list, tmp) { @@ -1194,5 +1196,5 @@ aiocbp_destroy(struct aioproc *aioproc) aioproc->aio_hash = NULL; aioproc->aio_hash_mask = 0; aioproc->aio_hash_size = 0; - mutex_exit(&aioproc->aio_mtx); + mutex_exit(&aioproc->aio_hash_mtx); } diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 312580019a135..8dd044a18366b 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -175,6 +175,7 @@ struct aioproc { unsigned int jobs_count; /* Count of the jobs */ TAILQ_HEAD(, aio_job) jobs_queue;/* Queue of the AIO jobs */ struct lwp *aio_worker; /* AIO worker thread */ + kmutex_t aio_hash_mtx; /* Protects the hash table */ struct aiost_list aiost_total; /* Total list of servicing threads */ struct aiocbp_list *aio_hash; /* Aiocbp hash root */ size_t aio_hash_size; /* Total number of buckets */ From 480c228bd1f38dada0095133d7e734672b527b5b Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Mon, 7 Jul 2025 02:26:37 -0600 Subject: [PATCH 29/53] major simplification and bugs --- sys/kern/sys_aio.c | 38 ++++++---- sys/kern/sys_aiosp.c | 173 ++++++++----------------------------------- sys/sys/aio.h | 2 +- 3 files changed, 54 insertions(+), 159 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 1d0c22813cd2b..0f6fc82cc5673 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -188,10 +188,12 @@ aio_procinit(struct proc *p) aio = kmem_zalloc(sizeof(struct aioproc), KM_SLEEP); /* Initialize the aiocbp hash map */ +#ifdef AIOSP error = aiocbp_init(aio, 256); if (error) { return error; } +#endif /* Initialize queue and their synchronization structures */ mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE); @@ -261,7 +263,9 @@ aio_exit(struct proc *p, void *cookie) } /* Destroy and free the entire AIO data structure */ +#ifdef AIOSP aiocbp_destroy(aio); +#endif cv_destroy(&aio->aio_worker_cv); cv_destroy(&aio->done_cv); mutex_destroy(&aio->aio_mtx); @@ -637,11 +641,6 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) if (error) { return SET_ERROR(error); } - - error = aiosp_dispense_bank(); - if (error) { - return SET_ERROR(error); - } #else TAILQ_INSERT_TAIL(&aio->jobs_queue, a_job, list); aio->jobs_count++; @@ -820,11 +819,12 @@ int sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap, register_t *retval) { - /* { - syscallarg(struct aiocb *) aiocbp; - } */ - - return aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); + int error; + error = aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); +#ifdef AIOSP + error = aiosp_dispense_bank(); +#endif + return error; } int @@ -976,11 +976,12 @@ int sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap, register_t *retval) { - /* { - syscallarg(struct aiocb *) aiocbp; - } */ - - return aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL); + int error; + error = aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL); +#ifdef AIOSP + error = aiosp_dispense_bank(); +#endif + return error; } int @@ -1070,6 +1071,13 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, errcnt++; } +#ifdef AIOSP + error = aiosp_dispense_bank(); + if (error) { + return error; + } +#endif + mutex_enter(&aio->aio_mtx); /* Return an error, if any */ diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index faed48fa1209d..1233cf52dc3ba 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -34,6 +34,7 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp $"); #include +#include #include #include #include @@ -73,16 +74,13 @@ static void aiosp_ops_fini(struct aiosp_ops *); static int aiost_create(struct aiosp *, struct aiost **); static int aiost_terminate(struct aiost *); -static int aiost_configure(struct aiost *, struct aio_job *, - vaddr_t *); -static int aiost_teardown(struct aiost *); static int aiost_process_rw(struct aiost *); static int aiost_process_sync(struct aiost *); static void aiost_entry(void *); static void aiost_sigsend(struct proc *, struct sigevent *); /* - * Tear down all service pools + * Teardown all service pools */ static int aio_fini(void) @@ -199,12 +197,6 @@ aiosp_distribute_jobs(struct aiosp *sp) mutex_enter(&aiost->mtx); - error = aiost_configure(aiost, job, &aiost->kbuf); - if (error) { - mutex_exit(&aiost->mtx); - break; - } - TAILQ_REMOVE(&sp->freelist, aiost, list); sp->nthreads_free--; @@ -305,13 +297,13 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, ops->completed++; } - if (powerof2(aiost->ops_total + 1)) { - size_t old_size = aiost->ops_total ? - aiosp_ops_expected(aiost->ops_total) : 0; - size_t new_size = aiosp_ops_expected(aiost->ops_total + - 1); + if (aiost->ops_total == aiost->ops_size) { + size_t old_size = aiost->ops_size; + size_t new_size = aiost->ops_size == 0 ? 1 : + (aiost->ops_size == 1 ? 2 : + (aiost->ops_size * aiost->ops_size)); - struct aiosp_ops **new_ops = kmem_zalloc(new_size * + struct aiosp_ops **new_ops = kmem_zalloc(new_size * sizeof(*new_ops), KM_SLEEP); if (aiost->ops && old_size) { @@ -321,11 +313,13 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, sizeof(*aiost->ops)); } + aiost->ops_size = new_size; aiost->ops = new_ops; } aiost->ops[aiost->ops_total] = ops; aiost->ops_total++; + mutex_exit(&aiost->mtx); ops->total++; } @@ -552,12 +546,6 @@ aiost_create(struct aiosp *sp, struct aiost **ret) mutex_init(&st->mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&st->service_cv, "aioservice"); - int error = kthread_create(PRI_KERNEL, 0, NULL, aiost_entry, - st, &st->lwp, "aio_%d_%ld", p->p_pid, sp->nthreads_total); - if (error) { - return error; - } - st->job = NULL; st->state = AIOST_STATE_NONE; st->aiosp = sp; @@ -567,6 +555,12 @@ aiost_create(struct aiosp *sp, struct aiost **ret) sp->nthreads_free++; sp->nthreads_total++; + int error = kthread_create(PRI_USER, 0, NULL, aiost_entry, + st, &st->lwp, "aio_%d_%ld", p->p_pid, sp->nthreads_total); + if (error) { + return error; + } + if (ret) { *ret = st; } @@ -595,7 +589,7 @@ aiost_entry(void *arg) */ mutex_enter(&st->mtx); for(;;) { - for (; st->state & AIOST_STATE_NONE;) { + for (; st->state == AIOST_STATE_NONE;) { error = cv_wait_sig(&st->service_cv, &st->mtx); if (error) { /* @@ -608,7 +602,7 @@ aiost_entry(void *arg) } } - if (st->state & AIOST_STATE_TERMINATE) { + if (st->state == AIOST_STATE_TERMINATE) { mutex_enter(&sp->mtx); if (st->freelist) { @@ -626,7 +620,7 @@ aiost_entry(void *arg) kthread_exit(0); } - if ((st->state & AIOST_STATE_OPERATION) == 0) { + if (st->state != AIOST_STATE_OPERATION) { panic("aio_process: invalid aiost state {%x}\n", st->state); } @@ -647,7 +641,8 @@ aiost_entry(void *arg) aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); - for (int i = 0; i < st->ops_total; i++) { struct aiosp_ops *ops = st->ops[i]; + for (int i = 0; i < st->ops_total; i++) { + struct aiosp_ops *ops = st->ops[i]; if (ops == NULL) { continue; } @@ -723,23 +718,22 @@ aiost_process_rw(struct aiost *aiost) if (aiocbp->aio_nbytes > SSIZE_MAX) { error = SET_ERROR(EINVAL); - printf("WHAT? %ld\n", aiocbp->aio_nbytes); goto done; } - fp = fd_getfile(fd); + fp = fd_getfile2(job->p, fd); if (fp == NULL) { error = SET_ERROR(EBADF); - //printf("is this legit? %d %d %ld\n", fd, error, aiocbp->aio_nbytes); goto done; } - aiov.iov_base = (void *)(uintptr_t)aiost->kbuf; + aiov.iov_base = aiocbp->aio_buf; aiov.iov_len = aiocbp->aio_nbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_resid = aiocbp->aio_nbytes; - auio.uio_vmspace = NULL; + auio.uio_offset = aiocbp->aio_offset; + auio.uio_vmspace = job->p->p_vmspace; if (job->aio_op & AIO_READ) { /* @@ -748,7 +742,7 @@ aiost_process_rw(struct aiost *aiost) KASSERT((job->aio_op & AIO_WRITE) == 0); if ((fp->f_flag & FREAD) == 0) { - fd_putfile(fd); + closef(fp); error = SET_ERROR(EBADF); goto done; } @@ -762,7 +756,7 @@ aiost_process_rw(struct aiost *aiost) KASSERT(job->aio_op & AIO_WRITE); if ((fp->f_flag & FWRITE) == 0) { - fd_putfile(fd); + closef(fp); error = SET_ERROR(EBADF); goto done; } @@ -770,7 +764,7 @@ aiost_process_rw(struct aiost *aiost) error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, &auio, fp->f_cred, FOF_UPDATE_OFFSET); } - fd_putfile(fd); + closef(fp); /* * Store the result value @@ -781,8 +775,6 @@ aiost_process_rw(struct aiost *aiost) job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; - printf("%d ", error); - return 0; } @@ -854,17 +846,13 @@ aiost_terminate(struct aiost *st) ops_total = st->ops_total; ops = st->ops; - error = aiost_teardown(st); - if (error) { - return error; - } - st->state = AIOST_STATE_TERMINATE; - if (ops && ops_total) { total = aiosp_ops_expected(st->ops_total); kmem_free(ops, total * sizeof(*ops)); } + st->state = AIOST_STATE_TERMINATE; + mutex_exit(&st->mtx); cv_signal(&st->service_cv); @@ -877,107 +865,6 @@ aiost_terminate(struct aiost *st) return error; } -/* - * Configure a servicing thread to handle a specific job. Initialise operation - * and establish the 'shared' memory region. - */ -static int -aiost_configure(struct aiost *aiost, struct aio_job *job, vaddr_t *kbuf) -{ - struct vmspace *vm = job->p->p_vmspace; - struct aiocb *aiocb = &job->aiocbp; - vaddr_t uva, kva; - paddr_t upa; - int error; - bool success; - - vm_prot_t protections = VM_PROT_NONE; - if (job->aio_op == AIO_READ) { - protections = VM_PROT_READ; - } else if(job->aio_op == AIO_WRITE) { - protections = VM_PROT_READ | VM_PROT_WRITE; - } else { - return 0; - } - - /* - * To account for the case where the memory is anonymously mapped and - * has not yet been fulfilled. - */ - error = uvm_vslock(vm, aiocb->aio_buf, aiocb->aio_nbytes, - protections); - if (error) { - return error; - } - - kva = uvm_km_alloc(kernel_map, aiocb->aio_nbytes, 0, - UVM_KMF_VAONLY); - if (!kva) { - uvm_vsunlock(vm, aiocb->aio_buf, aiocb->aio_nbytes); - return ENOMEM; - } - - /* - * Extract physical memory and map to the kernel - */ - for (uva = trunc_page((vaddr_t)aiocb->aio_buf); - uva < round_page((vaddr_t)aiocb->aio_buf + aiocb->aio_nbytes); - uva += PAGE_SIZE) { - - success = pmap_extract(vm_map_pmap(&vm->vm_map), uva, &upa); - if (!success) { - uvm_km_free(kernel_map, kva, aiocb->aio_nbytes, - UVM_KMF_VAONLY); - uvm_vsunlock(vm, aiocb->aio_buf, - aiocb->aio_nbytes); - return EFAULT; - } - - pmap_kenter_pa(kva + (uva - trunc_page((vaddr_t)aiocb->aio_buf)), - upa, protections, 0); - } - - pmap_update(pmap_kernel()); - *kbuf = kva + ((uintptr_t)aiocb->aio_buf & PAGE_MASK); - - return 0; -} - -/* - * Free all memory and meta associated with aiost->kbuf - */ -static int -aiost_teardown(struct aiost *aiost) -{ - struct aio_job *job; - struct vmspace *vm; - struct aiocb *aiocb; - vaddr_t kva; - - job = aiost->job; - if (job == NULL) { - return 0; - } - - vm = job->p->p_vmspace; - aiocb = &job->aiocbp; - - kva = (vaddr_t)aiost->kbuf; - if (!kva) { - return 0; - } - - for (vaddr_t va = kva; va < kva + round_page(aiocb->aio_nbytes); - va += PAGE_SIZE) { - pmap_kremove(va, PAGE_SIZE); - } - - uvm_km_free(kernel_map, kva, aiocb->aio_nbytes, UVM_KMF_VAONLY); - uvm_vsunlock(vm, aiocb->aio_buf, aiocb->aio_nbytes); - - return 0; -} - /* * For major workloads that actually merit the use of asynchronous IO you can * expect an arbitrarily high number of servicing threads to spawn. Throughout diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 8dd044a18366b..31bda9a8b21d9 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -134,7 +134,7 @@ struct aiost { struct lwp *lwp; /* Servicing thread LWP */ size_t ops_total; /* Total number of connected ops */ struct aiosp_ops **ops; /* Array of ops */ - vaddr_t kbuf; /* Shared memory buffer */ + size_t ops_size; /* Size of ops array */ int state; /* The state of the thread */ bool freelist; /* Whether or not aiost is on freelist */ }; From 5e3ede6d2e2cdeeccb50b50803676dad45f1d30b Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Fri, 11 Jul 2025 18:30:49 -0600 Subject: [PATCH 30/53] simplify thread representation and job coalescing --- sys/kern/sys_aio.c | 33 ++- sys/kern/sys_aiosp.c | 598 +++++++++++++++++++------------------------ sys/sys/aio.h | 73 +++--- 3 files changed, 324 insertions(+), 380 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 0f6fc82cc5673..237f3758014fb 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -189,7 +189,12 @@ aio_procinit(struct proc *p) /* Initialize the aiocbp hash map */ #ifdef AIOSP - error = aiocbp_init(aio, 256); + error = aiosp_initialize(&aio->aiosp); + if (error) { + return error; + } + + error = aiocbp_init(&aio->aiosp, 256); if (error) { return error; } @@ -200,7 +205,6 @@ aio_procinit(struct proc *p) cv_init(&aio->aio_worker_cv, "aiowork"); cv_init(&aio->done_cv, "aiodone"); TAILQ_INIT(&aio->jobs_queue); - TAILQ_INIT(&aio->aiost_total); /* * Create an AIO worker thread. @@ -264,7 +268,8 @@ aio_exit(struct proc *p, void *cookie) /* Destroy and free the entire AIO data structure */ #ifdef AIOSP - aiocbp_destroy(aio); + aiocbp_destroy(&aio->aiosp); + aiosp_destroy(&aio->aiosp); #endif cv_destroy(&aio->aio_worker_cv); cv_destroy(&aio->done_cv); @@ -563,7 +568,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) aio = p->p_aio; if (aio) { #ifdef AIOSP - error = aiosp_validate_conflicts(aio, aiocb_uptr); + error = aiosp_validate_conflicts(&aio->aiosp, aiocb_uptr); #else error = aio_validate_conflicts(aio, aiocb_uptr); #endif @@ -605,7 +610,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) a_job->aiocb_uptr = aiocb_uptr; a_job->aio_op |= op; a_job->lio = lio; - a_job->aiost = NULL; + mutex_init(&a_job->mtx, MUTEX_DEFAULT, IPL_NONE); /* * Add the job to the queue, update the counters, and @@ -632,12 +637,12 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) mutex_exit(&aio->aio_mtx); - error = aiocbp_insert(aio, aiocbp); + error = aiocbp_insert(&aio->aiosp, aiocbp); if (error) { return SET_ERROR(error); } - error = aiosp_enqueue_job(a_job); + error = aiosp_enqueue_job(&aio->aiosp, a_job); if (error) { return SET_ERROR(error); } @@ -822,7 +827,9 @@ sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap, int error; error = aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); #ifdef AIOSP - error = aiosp_dispense_bank(); + struct proc *p = curlwp->l_proc; + struct aioproc *aio = p->p_aio; + error = aiosp_distribute_jobs(&aio->aiosp); #endif return error; } @@ -892,7 +899,8 @@ sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, #ifdef AIOSP struct proc *p = l->l_proc; struct aioproc *aio = p->p_aio; - error = aiosp_suspend(aio, list, nent, SCARG(uap, timeout) ? + KASSERT(aio); + error = aiosp_suspend(&aio->aiosp, list, nent, SCARG(uap, timeout) ? &ts : NULL, AIOSP_SUSPEND_ALL); #else error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); @@ -979,7 +987,10 @@ sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap, int error; error = aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL); #ifdef AIOSP - error = aiosp_dispense_bank(); + struct proc *p = curlwp->l_proc; + struct aioproc *aio = p->p_aio; + KASSERT(aio); + error = aiosp_distribute_jobs(&aio->aiosp); #endif return error; } @@ -1072,7 +1083,7 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, } #ifdef AIOSP - error = aiosp_dispense_bank(); + error = aiosp_distribute_jobs(&aio->aiosp); if (error) { return error; } diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 1233cf52dc3ba..5337578d4a06b 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -59,18 +59,10 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp MODULE(MODULE_CLASS_MISC, aiosp, NULL); -static kmutex_t aiospb_mtx; -static u_int aiospb_max = PRI_KTHREAD + NPRI_KTHREAD; -static struct aiosp **aiospb; - -static int aiosp_initialize(struct aiosp *, pri_t); -static int aiosp_destroy(struct aiosp *); -static int aiosp_retrieve_bank(pri_t, struct aiosp **); -static int aiosp_pri_idx(pri_t); - static size_t aiosp_ops_expected(size_t); static void aiosp_ops_init(struct aiosp_ops *); static void aiosp_ops_fini(struct aiosp_ops *); +static int aiosp_worker_extract(struct aiosp *, struct aiost **); static int aiost_create(struct aiosp *, struct aiost **); static int aiost_terminate(struct aiost *); @@ -78,141 +70,131 @@ static int aiost_process_rw(struct aiost *); static int aiost_process_sync(struct aiost *); static void aiost_entry(void *); static void aiost_sigsend(struct proc *, struct sigevent *); +static void aiost_notify_ops (struct aio_job *); /* - * Teardown all service pools + * Module interface */ static int -aio_fini(void) +aiosp_modcmd(modcmd_t cmd, void *arg) { - struct aiosp *aiosp; - int error; - - for (int i = 0; i < aiospb_max; i++) { - aiosp = aiospb[i]; - if (aiosp == NULL) { - continue; - } - - error = aiosp_destroy(aiosp); - if (error) { - return error; - } - - kmem_free(aiosp, sizeof(*aiosp)); + switch (cmd) { + case MODULE_CMD_INIT: + return 0; + case MODULE_CMD_FINI: + return 0; + default: + return SET_ERROR(ENOTTY); } - - kmem_free(aiospb, sizeof(*aiospb) * aiospb_max); - - return 0; } /* - * Initialize global service pool state + * Order RB with respect to fp */ static int -aio_init(void) +aiost_file_group_cmp(struct aiost_file_group *a, struct aiost_file_group *b) { - struct aiosp *aiosp; - int error; - - mutex_init(&aiospb_mtx, MUTEX_DEFAULT, IPL_NONE); - - aiospb = kmem_zalloc(sizeof(*aiospb) * aiospb_max, KM_SLEEP); - aiosp = kmem_zalloc(sizeof(*aiosp), KM_SLEEP); - aiospb[aiosp_pri_idx(PRI_KTHREAD)] = aiosp; - - error = aiosp_initialize(aiosp, PRI_KTHREAD); - if (error) { - return error; + if (a == NULL || b == NULL) { + return (a == b) ? 0 : (a ? 1 : -1); } - return 0; -} + uintptr_t ap = (uintptr_t)a->fp; + uintptr_t bp = (uintptr_t)b->fp; -/* - * Module interface - */ -static int -aiosp_modcmd(modcmd_t cmd, void *arg) -{ - switch (cmd) { - case MODULE_CMD_INIT: - return aio_init(); - case MODULE_CMD_FINI: - return aio_fini(); - default: - return SET_ERROR(ENOTTY); - } + return (ap < bp) ? -1 : (ap > bp) ? 1 : 0; } +RB_HEAD(aiost_file_tree, aiost_file_group); +RB_PROTOTYPE(aiost_file_tree, aiost_file_group, tree, aiost_file_group_cmp); +RB_GENERATE(aiost_file_tree, aiost_file_group, tree, aiost_file_group_cmp); + /* - * Distributes pending jobs to servicing threads. Allocates the requisite number - * of servicing threads, creates new threads if necessary, then assigns a single - * job to be completed by a servicing thread. + * Group jobs by file handle for coalescing and distribute them among service + * threads */ int aiosp_distribute_jobs(struct aiosp *sp) { struct aio_job *job; + struct file *fp; int error = 0; - /* - * Check to see if the number of pending jobs exceeds the number of free - * service threads. If it does then that means we need to create new - * threads. - */ mutex_enter(&sp->mtx); if (!sp->jobs_pending) { mutex_exit(&sp->mtx); return 0; } - if (sp->jobs_pending > sp->nthreads_free) { - int nthreads_new = sp->jobs_pending - sp->nthreads_free; - - for (int i = 0; i < nthreads_new; i++) { - struct aiost *aiost; + struct aio_job *tmp; + TAILQ_FOREACH_SAFE(job, &sp->jobs, list, tmp) { + fp = fd_getfile2(job->p, job->aiocbp.aio_fildes); + if (fp == NULL) { + error = SET_ERROR(EBADF); + return error; + } - error = aiost_create(sp, &aiost); + struct aiost_file_group *fg = NULL; + struct aiost *aiost = NULL; + + if (fp->f_vnode && fp->f_vnode->v_type == VREG) { + struct aiost_file_group find = { 0 }; + find.fp = fp; + fg = RB_FIND(aiost_file_tree, sp->fg_root, &find); + + if (fg == NULL) { + fg = kmem_zalloc(sizeof(*fg), KM_SLEEP); + fg->fp = fp; + fg->vp = fp->f_vnode; + fg->queue_size = 0; + fg->refcnt = 1; + TAILQ_INIT(&fg->queue); + + error = aiosp_worker_extract(sp, &aiost); + if (error) { + kmem_free(fg, sizeof(*fg)); + closef(fp); + mutex_exit(&sp->mtx); + return error; + } + + RB_INSERT(aiost_file_tree, sp->fg_root, fg); + fg->aiost = aiost; + + aiost->fg = fg; + aiost->job = NULL; + } else { + /* + * release fp as it already exists within fg + */ + closef(fp); + aiost = fg->aiost; + } + } else { + error = aiosp_worker_extract(sp, &aiost); if (error) { + closef(fp); mutex_exit(&sp->mtx); return error; } - } - } - /* - * Loop over all pending jobs and assign a thread from the freelist. - * Move from freelist to active. Configure service thread to work with - * respect to the job. Also signal the CV outside of sp->mtx to avoid - * any shenanigans. - */ - struct aio_job *tmp; - TAILQ_FOREACH_SAFE(job, &sp->jobs, list, tmp) { - struct aiost *aiost = TAILQ_LAST(&sp->freelist, aiost_list); - if (aiost == NULL) { - panic("aiosp_distribute_jobs: aiost is null"); + aiost->fg = NULL; + aiost->job = job; } - mutex_enter(&aiost->mtx); - - TAILQ_REMOVE(&sp->freelist, aiost, list); - sp->nthreads_free--; - - TAILQ_INSERT_TAIL(&sp->active, aiost, list); - sp->nthreads_active++; - + /* + * Move from sp->jobs to fg->jobs + */ TAILQ_REMOVE(&sp->jobs, job, list); + sp->jobs_pending--; + + if (fg) { + TAILQ_INSERT_TAIL(&fg->queue, job, list); + fg->queue_size++; + } - job->aiost = aiost; - aiost->job = job; aiost->freelist = false; aiost->state = AIOST_STATE_OPERATION; - sp->jobs_pending--; - - mutex_exit(&aiost->mtx); cv_signal(&aiost->service_cv); } @@ -230,7 +212,7 @@ aiosp_distribute_jobs(struct aiosp *sp) * incremeneted posthumously as well. */ int -aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, +aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, struct timespec *ts, uint32_t flags) { struct aio_job *job; @@ -277,7 +259,7 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, } struct aiocbp *aiocbp = NULL; - error = aiocbp_lookup(aioproc, &aiocbp, aiocbp_list[i]); + error = aiocbp_lookup(aiosp, &aiocbp, aiocbp_list[i]); if (error) { mutex_exit(&ops->mtx); aiosp_ops_fini(ops); @@ -289,38 +271,36 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, } job = aiocbp->job; - struct aiost *aiost = aiocbp->job->aiost; - KASSERT(aiost); - mutex_enter(&aiost->mtx); + mutex_enter(&job->mtx); if (job->completed) { ops->completed++; } - if (aiost->ops_total == aiost->ops_size) { - size_t old_size = aiost->ops_size; - size_t new_size = aiost->ops_size == 0 ? 1 : - (aiost->ops_size == 1 ? 2 : - (aiost->ops_size * aiost->ops_size)); + if (job->ops_total <= job->ops_size) { + size_t old_size = job->ops_size; + size_t new_size = job->ops_size == 0 ? 1 : + (job->ops_size == 1 ? 2 : + (job->ops_size * job->ops_size)); struct aiosp_ops **new_ops = kmem_zalloc(new_size * sizeof(*new_ops), KM_SLEEP); - if (aiost->ops && old_size) { - memcpy(new_ops, aiost->ops, - aiost->ops_total * sizeof(*aiost->ops)); - kmem_free(aiost->ops, old_size * - sizeof(*aiost->ops)); + if (job->ops && old_size) { + memcpy(new_ops, job->ops, + job->ops_total * sizeof(*job->ops)); + kmem_free(job->ops, old_size * + sizeof(*job->ops)); } - aiost->ops_size = new_size; - aiost->ops = new_ops; + job->ops_size = new_size; + job->ops = new_ops; } - aiost->ops[aiost->ops_total] = ops; - aiost->ops_total++; + job->ops[job->ops_total] = ops; + job->ops_total++; - mutex_exit(&aiost->mtx); + mutex_exit(&job->mtx); ops->total++; } @@ -347,69 +327,22 @@ aiosp_suspend(struct aioproc *aioproc, struct aiocb **aiocbp_list, int nent, return error; } -/* - * Distribute all pending operations on all service queues attached to the - * primary bank - */ -int -aiosp_dispense_bank(void) -{ - int error; - struct aiosp *sp; - - mutex_enter(&aiospb_mtx); - - for (int i = 0; i < aiosp_pri_idx(aiospb_max); i++) { - sp = aiospb[i]; - if (sp == NULL) { - continue; - } - - error = aiosp_distribute_jobs(sp); - if (error) { - mutex_exit(&aiospb_mtx); - return error; - } - } - - mutex_exit(&aiospb_mtx); - - return 0; -} - /* * Initializes a servicing pool. */ -static int -aiosp_initialize(struct aiosp *sp, pri_t pri) +int +aiosp_initialize(struct aiosp *sp) { - sp->priority = pri; mutex_init(&sp->mtx, MUTEX_DEFAULT, IPL_NONE); TAILQ_INIT(&sp->freelist); TAILQ_INIT(&sp->active); TAILQ_INIT(&sp->jobs); + sp->fg_root = kmem_zalloc(sizeof(*sp->fg_root), KM_SLEEP); + RB_INIT(sp->fg_root); return 0; } -/* - * Convert a priority into an index into the service pool bank. - */ -static int -aiosp_pri_idx(pri_t pri) -{ - if (pri < PRI_KTHREAD) { - panic("aio_process: invalid priority for AIO ( aiospb_max) { - panic("aio_process: invalid priority for AIO (>NPRI_KTHREAD"); - } - - return idx; -} - /* * The size of aiost->ops scales with powers of two. The size of aiost->ops will * only either collapse to zero upon being terminated, or continue growing, so @@ -432,43 +365,37 @@ aiosp_ops_expected(size_t total) } /* - * Convert a priority into associative service pool. Initialize the pool if it - * does not yet exist. + * */ static int -aiosp_retrieve_bank(pri_t pri, struct aiosp **aiosp) +aiosp_worker_extract(struct aiosp *sp, struct aiost **aiost) { int error; - int bank_pri_idx; - - mutex_enter(&aiospb_mtx); - - bank_pri_idx = aiosp_pri_idx(pri); - *aiosp = aiospb[bank_pri_idx]; - if (*aiosp == NULL) { - aiospb[bank_pri_idx] = kmem_zalloc(sizeof(**aiospb), - KM_SLEEP); - *aiosp = aiospb[bank_pri_idx]; - - error = aiosp_initialize(*aiosp, pri); + if (sp->nthreads_free == 0) { + error = aiost_create(sp, aiost); if (error) { - mutex_exit(&aiospb_mtx); return error; } + } else { + *aiost = TAILQ_LAST(&sp->freelist, aiost_list); } - mutex_exit(&aiospb_mtx); + TAILQ_REMOVE(&sp->freelist, *aiost, list); + sp->nthreads_free--; + TAILQ_INSERT_TAIL(&sp->active, *aiost, list); + sp->nthreads_active++; return 0; } + /* * Each process keeps track of all the service threads instantiated to service * an asynchronous operation by the process. When a process is terminated we * must also terminate all of its active and pending asynchronous operation. */ -static int +int aiosp_destroy(struct aiosp *sp) { struct aiost *st; @@ -484,7 +411,6 @@ aiosp_destroy(struct aiosp *sp) error = aiost_terminate(st); if (error) { mutex_exit(&sp->mtx); - kmem_free(sp, sizeof(*sp)); return error; } @@ -495,7 +421,6 @@ aiosp_destroy(struct aiosp *sp) error = aiost_terminate(st); if (error) { mutex_exit(&sp->mtx); - kmem_free(sp, sizeof(*sp)); return error; } @@ -503,31 +428,24 @@ aiosp_destroy(struct aiosp *sp) } mutex_exit(&sp->mtx); - kmem_free(sp, sizeof(*sp)); + mutex_destroy(&sp->mtx); return 0; } + /* - * Enqueue a job for processing by a servicing queue + * Enqueue a job for processing by the process's servicing pool */ int -aiosp_enqueue_job(struct aio_job *job) +aiosp_enqueue_job(struct aiosp *aiosp, struct aio_job *job) { - int error; - struct aiosp *sp; - - error = aiosp_retrieve_bank(job->pri, &sp); - if (error) { - return error; - } - - mutex_enter(&sp->mtx); + mutex_enter(&aiosp->mtx); - TAILQ_INSERT_TAIL(&sp->jobs, job, list); - sp->jobs_pending++; + TAILQ_INSERT_TAIL(&aiosp->jobs, job, list); + aiosp->jobs_pending++; - mutex_exit(&sp->mtx); + mutex_exit(&aiosp->mtx); return 0; } @@ -568,6 +486,33 @@ aiost_create(struct aiosp *sp, struct aiost **ret) return 0; } +/* + * wake up anyone waiting on the completion of this job + */ +static void +aiost_notify_ops (struct aio_job *job) +{ + for (int i = 0; i < job->ops_total; i++) { + struct aiosp_ops *ops = job->ops[i]; + if (ops == NULL) { + continue; + } + + mutex_enter(&ops->mtx); + KASSERT(ops->total > ops->completed); + ops->completed++; + mutex_exit(&ops->mtx); + cv_signal(&ops->done_cv); + } + + if (job->ops && job->ops_total) { + size_t total = aiosp_ops_expected(job->ops_total); + kmem_free(job->ops, total * sizeof(*job->ops)); + job->ops_total = 0; + job->ops = NULL; + } +} + /* * Servicing thread entry point. Process the operation. Notify all those * blocking on the completion of the operation. Send a signal if necessary. And @@ -603,21 +548,7 @@ aiost_entry(void *arg) } if (st->state == AIOST_STATE_TERMINATE) { - mutex_enter(&sp->mtx); - - if (st->freelist) { - TAILQ_REMOVE(&sp->freelist, st, list); - sp->nthreads_free--; - } else { - TAILQ_REMOVE(&sp->active, st, list); - sp->nthreads_active--; - } - - sp->nthreads_total--; - - mutex_exit(&sp->mtx); - mutex_exit(&st->mtx); - kthread_exit(0); + break; } if (st->state != AIOST_STATE_OPERATION) { @@ -625,42 +556,52 @@ aiost_entry(void *arg) st->state); } - job = st->job; - KASSERT(job != NULL); - if (job->aio_op & (AIO_READ | AIO_WRITE)) { - error = aiost_process_rw(st); - } else if (job->aio_op & AIO_SYNC) { - error = aiost_process_sync(st); - } else { - panic("aio_process: invalid operation code\n"); - } + if (st->fg) { + struct aiost_file_group *fg = st->fg; - job->completed = true; - st->state = AIOST_STATE_NONE; - st->job = NULL; + struct aio_job *tmp; + TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { + if (job->aio_op & (AIO_READ | AIO_WRITE)) { + } else if (job->aio_op & AIO_SYNC) { + } - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + mutex_enter(&job->mtx); + job->completed = true; + mutex_exit(&job->mtx); - for (int i = 0; i < st->ops_total; i++) { - struct aiosp_ops *ops = st->ops[i]; - if (ops == NULL) { - continue; + aiost_notify_ops(job); + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + + TAILQ_REMOVE(&fg->queue, job, list); + fg->queue_size--; } - mutex_enter(&ops->mtx); - KASSERT(ops->total > ops->completed); - ops->completed++; - mutex_exit(&ops->mtx); - cv_signal(&ops->done_cv); - } + mutex_enter(&sp->mtx); + RB_REMOVE(aiost_file_tree, sp->fg_root, fg); + closef(fg->fp); + kmem_free(fg, sizeof(*fg)); + mutex_exit(&sp->mtx); + } else { + job = st->job; + KASSERT(job != NULL); + if (job->aio_op & (AIO_READ | AIO_WRITE)) { + error = aiost_process_rw(st); + } else if (job->aio_op & AIO_SYNC) { + error = aiost_process_sync(st); + } else { + panic("aio_process: invalid operation code\n"); + } + + job->completed = true; - if (st->ops && st->ops_total) { - size_t total = aiosp_ops_expected(st->ops_total); - kmem_free(st->ops, total * sizeof(*st->ops)); - st->ops_total = 0; - st->ops = NULL; + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + aiost_notify_ops(job); } + st->state = AIOST_STATE_NONE; + st->job = NULL; + st->fg = NULL; + /* * Remove st from list of active service threads, append to * freelist, dance around locks, then iterate loop and block on @@ -678,6 +619,22 @@ aiost_entry(void *arg) mutex_exit(&sp->mtx); } + + mutex_enter(&sp->mtx); + + if (st->freelist) { + TAILQ_REMOVE(&sp->freelist, st, list); + sp->nthreads_free--; + } else { + TAILQ_REMOVE(&sp->active, st, list); + sp->nthreads_active--; + } + + sp->nthreads_total--; + + mutex_exit(&sp->mtx); + mutex_exit(&st->mtx); + kthread_exit(0); } /* @@ -837,20 +794,9 @@ static int aiost_terminate(struct aiost *st) { int error = 0; - size_t ops_total; - struct aiosp_ops **ops; - size_t total; mutex_enter(&st->mtx); - ops_total = st->ops_total; - ops = st->ops; - - if (ops && ops_total) { - total = aiosp_ops_expected(st->ops_total); - kmem_free(ops, total * sizeof(*ops)); - } - st->state = AIOST_STATE_TERMINATE; mutex_exit(&st->mtx); @@ -866,39 +812,7 @@ aiost_terminate(struct aiost *st) } /* - * For major workloads that actually merit the use of asynchronous IO you can - * expect an arbitrarily high number of servicing threads to spawn. Throughout - * their lifecycle these servicing threads will remain cached within the bank to - * be pulled from when needed. It makes sense to flush this cache routinely when - * a process terminates. All servicing threads spawned by a given process will - * be flushed when that process terminates. - */ -int -aiosp_flush(struct aioproc *aioproc) -{ - struct aiost *st; - struct aiost *tmp; - int error; - - mutex_enter(&aioproc->aio_mtx); - - TAILQ_FOREACH_SAFE(st, &aioproc->aiost_total, list, tmp) { - error = aiost_terminate(st); - if (error) { - mutex_exit(&aioproc->aio_mtx); - return error; - } - - kmem_free(st, sizeof(*st)); - } - - mutex_exit(&aioproc->aio_mtx); - - return error; -} - -/* - * initialises aiosp_ops + * Initialises aiosp_ops */ static void aiosp_ops_init(struct aiosp_ops *ops) @@ -910,7 +824,7 @@ aiosp_ops_init(struct aiosp_ops *ops) } /* - * cleans up aiosp_ops + * Cleans up aiosp_ops */ static void aiosp_ops_fini(struct aiosp_ops *ops) @@ -923,20 +837,24 @@ aiosp_ops_fini(struct aiosp_ops *ops) * Ensure that the same job can not be enqueued twice. */ int -aiosp_validate_conflicts(struct aioproc *aioproc, void *uptr) +aiosp_validate_conflicts(struct aiosp *aiosp, void *uptr) { struct aiost *st; - mutex_enter(&aioproc->aio_mtx); - TAILQ_FOREACH(st, &aioproc->aiost_total, list) { - if (st->job->aiocb_uptr != uptr) { - continue; + mutex_enter(&aiosp->mtx); + + /* check active threads */ + TAILQ_FOREACH(st, &aiosp->active, list) { + KASSERT(st->job); + if (st->job->aiocb_uptr == uptr) { + mutex_exit(&aiosp->mtx); + return EINVAL; } - mutex_exit(&aioproc->aio_mtx); - return EINVAL; } - mutex_exit(&aioproc->aio_mtx); + /* no need to check freelist threads as they have no jobs */ + + mutex_exit(&aiosp->mtx); return 0; } @@ -953,26 +871,26 @@ aiocbp_hash(void *uptr) * aiocbp hash lookup */ int -aiocbp_lookup(struct aioproc *aioproc, struct aiocbp **aiocbpp, void *uptr) +aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, void *uptr) { struct aiocbp *aiocbp; u_int hash; - hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; + hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; //printf("searching element with key {%lx} and hash {%x}\n", (uintptr_t)uptr, hash); - mutex_enter(&aioproc->aio_hash_mtx); - TAILQ_FOREACH(aiocbp, &aioproc->aio_hash[hash], list) { + mutex_enter(&aiosp->aio_hash_mtx); + TAILQ_FOREACH(aiocbp, &aiosp->aio_hash[hash], list) { if (aiocbp->uptr == uptr) { //printf("element found {%lx} and the job {%lx} {%lx}\n", (uintptr_t)aiocbp, (uintptr_t)aiocbp->job, (uintptr_t)aiocbp->job->aiost); *aiocbpp = aiocbp; - mutex_exit(&aioproc->aio_hash_mtx); + mutex_exit(&aiosp->aio_hash_mtx); return 0; } } - mutex_exit(&aioproc->aio_hash_mtx); + mutex_exit(&aiosp->aio_hash_mtx); return ENOENT; } @@ -981,23 +899,23 @@ aiocbp_lookup(struct aioproc *aioproc, struct aiocbp **aiocbpp, void *uptr) * aiocbp hash removal */ int -aiocbp_remove(struct aioproc *aioproc, void *uptr) +aiocbp_remove(struct aiosp *aiosp, void *uptr) { struct aiocbp *aiocbp; u_int hash; - hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; + hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; struct aiocbp *tmp; - mutex_enter(&aioproc->aio_hash_mtx); - TAILQ_FOREACH_SAFE(aiocbp, &aioproc->aio_hash[hash], list, tmp) { + mutex_enter(&aiosp->aio_hash_mtx); + TAILQ_FOREACH_SAFE(aiocbp, &aiosp->aio_hash[hash], list, tmp) { if (aiocbp->uptr == uptr) { - TAILQ_REMOVE(&aioproc->aio_hash[hash], aiocbp, list); - mutex_exit(&aioproc->aio_hash_mtx); + TAILQ_REMOVE(&aiosp->aio_hash[hash], aiocbp, list); + mutex_exit(&aiosp->aio_hash_mtx); return 0; } } - mutex_exit(&aioproc->aio_hash_mtx); + mutex_exit(&aiosp->aio_hash_mtx); return ENOENT; } @@ -1006,28 +924,28 @@ aiocbp_remove(struct aioproc *aioproc, void *uptr) * aiocbp hash insertion */ int -aiocbp_insert(struct aioproc *aioproc, struct aiocbp *aiocbp) +aiocbp_insert(struct aiosp *aiosp, struct aiocbp *aiocbp) { struct aiocbp *found; void *uptr; u_int hash; uptr = aiocbp->uptr; - hash = aiocbp_hash(uptr) & aioproc->aio_hash_mask; + hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; - mutex_enter(&aioproc->aio_hash_mtx); - TAILQ_FOREACH(found, &aioproc->aio_hash[hash], list) { + mutex_enter(&aiosp->aio_hash_mtx); + TAILQ_FOREACH(found, &aiosp->aio_hash[hash], list) { if (found->uptr == uptr) { found->job = aiocbp->job; - mutex_exit(&aioproc->aio_hash_mtx); + mutex_exit(&aiosp->aio_hash_mtx); return EEXIST; } } //printf("appending element with key {%x} onto hash {%lx} aiocbp {%lx}\n", hash, (uintptr_t)uptr, (uintptr_t)aiocbp); - TAILQ_INSERT_HEAD(&aioproc->aio_hash[hash], aiocbp, list); - mutex_exit(&aioproc->aio_hash_mtx); + TAILQ_INSERT_HEAD(&aiosp->aio_hash[hash], aiocbp, list); + mutex_exit(&aiosp->aio_hash_mtx); return 0; } @@ -1036,22 +954,22 @@ aiocbp_insert(struct aioproc *aioproc, struct aiocbp *aiocbp) * aiocbp initialise */ int -aiocbp_init(struct aioproc *aioproc, u_int hashsize) +aiocbp_init(struct aiosp *aiosp, u_int hashsize) { if (!powerof2(hashsize)) { return EINVAL; } - aioproc->aio_hash = kmem_zalloc(hashsize * sizeof(*aioproc->aio_hash), + aiosp->aio_hash = kmem_zalloc(hashsize * sizeof(*aiosp->aio_hash), KM_SLEEP); - aioproc->aio_hash_mask = hashsize - 1; - aioproc->aio_hash_size = hashsize; + aiosp->aio_hash_mask = hashsize - 1; + aiosp->aio_hash_size = hashsize; - mutex_init(&aioproc->aio_hash_mtx, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&aiosp->aio_hash_mtx, MUTEX_DEFAULT, IPL_NONE); for (size_t i = 0; i < hashsize; i++) { - TAILQ_INIT(&aioproc->aio_hash[i]); + TAILQ_INIT(&aiosp->aio_hash[i]); } return 0; @@ -1061,27 +979,27 @@ aiocbp_init(struct aioproc *aioproc, u_int hashsize) * aiocbp destroy */ void -aiocbp_destroy(struct aioproc *aioproc) +aiocbp_destroy(struct aiosp *aiosp) { - if (aioproc->aio_hash == NULL) { + if (aiosp->aio_hash == NULL) { return; } struct aiocbp *aiocbp; - mutex_enter(&aioproc->aio_hash_mtx); - for (size_t i = 0; i < aioproc->aio_hash_size; i++) { + mutex_enter(&aiosp->aio_hash_mtx); + for (size_t i = 0; i < aiosp->aio_hash_size; i++) { struct aiocbp *tmp; - TAILQ_FOREACH_SAFE(aiocbp, &aioproc->aio_hash[i], list, tmp) { - TAILQ_REMOVE(&aioproc->aio_hash[i], aiocbp, list); + TAILQ_FOREACH_SAFE(aiocbp, &aiosp->aio_hash[i], list, tmp) { + TAILQ_REMOVE(&aiosp->aio_hash[i], aiocbp, list); kmem_free(aiocbp, sizeof(*aiocbp)); } } - kmem_free(aioproc->aio_hash, - aioproc->aio_hash_size * sizeof(*aioproc->aio_hash)); - aioproc->aio_hash = NULL; - aioproc->aio_hash_mask = 0; - aioproc->aio_hash_size = 0; - mutex_exit(&aioproc->aio_hash_mtx); + kmem_free(aiosp->aio_hash, + aiosp->aio_hash_size * sizeof(*aiosp->aio_hash)); + aiosp->aio_hash = NULL; + aiosp->aio_hash_mask = 0; + aiosp->aio_hash_size = 0; + mutex_exit(&aiosp->aio_hash_mtx); } diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 31bda9a8b21d9..c9cf35257731c 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -31,6 +31,7 @@ #include #include +#include /* Returned by aio_cancel() */ #define AIO_CANCELED 0x1 @@ -93,13 +94,16 @@ struct aiocb { /* Structure of AIO job */ struct aiost; struct aio_job { + kmutex_t mtx; /* Protects this structure */ int aio_op; /* Operation code */ struct aiocb aiocbp; /* AIO data structure */ pri_t pri; /* Job priority */ void *aiocb_uptr; /* User-space pointer for identification of job */ struct proc *p; /* Process that instantiated the job */ - struct aiost *aiost; /* Service thread associated with this job */ bool completed; /* Marks the completion status of this job */ + struct aiosp_ops **ops; /* Array of ops */ + size_t ops_size; /* Size of ops array */ + size_t ops_total; /* Total number of connected ops */ TAILQ_ENTRY(aio_job) list; struct lio_req *lio; }; @@ -123,6 +127,17 @@ struct aiosp_ops { size_t total; /* Keeps track of the number of total jobs */ }; +struct aiost; +struct aiost_file_group { + RB_ENTRY(aiost_file_group) tree; + struct file *fp; + struct vnode *vp; + struct aiost *aiost; + TAILQ_HEAD(, aio_job) queue; + size_t queue_size; + int refcnt; +}; + /* Structure for AIO servicing thread */ struct aiosp; struct aiost { @@ -130,17 +145,23 @@ struct aiost { struct aiosp *aiosp; /* Servicing pool of this thread */ kmutex_t mtx; /* Protects this structure */ kcondvar_t service_cv; /* Signal to activate thread */ - struct aio_job *job; /* Jobs associated with the thread */ struct lwp *lwp; /* Servicing thread LWP */ - size_t ops_total; /* Total number of connected ops */ - struct aiosp_ops **ops; /* Array of ops */ - size_t ops_size; /* Size of ops array */ int state; /* The state of the thread */ bool freelist; /* Whether or not aiost is on freelist */ + struct aiost_file_group *fg; /* File group associated with the thread */ + struct aio_job *job; /* Singleton job */ +}; + +struct aiocbp { + TAILQ_ENTRY(aiocbp) list; + void *uptr; + struct aio_job *job; }; /* Structure for AIO servicing pool */ TAILQ_HEAD(aiost_list, aiost); +TAILQ_HEAD(aiocbp_list, aiocbp); +struct aiost_file_tree; struct aiosp { struct aiost_list freelist; /* Available service threads */ size_t nthreads_free; /* Length of freelist */ @@ -150,13 +171,11 @@ struct aiosp { size_t jobs_pending; /* Number of pending jobs */ kmutex_t mtx; /* Protects structure */ size_t nthreads_total; /* Number of total servicing threads */ - pri_t priority; /* Thread priority of the pool */ -}; - -struct aiocbp { - TAILQ_ENTRY(aiocbp) list; - void *uptr; - struct aio_job *job; + struct aiocbp_list *aio_hash; /* Aiocbp hash root */ + kmutex_t aio_hash_mtx; /* Protects the hash table */ + size_t aio_hash_size; /* Total number of buckets */ + u_int aio_hash_mask; /* Hash mask */ + struct aiost_file_tree *fg_root;/* RB tree of file groups */ }; /* LIO structure */ @@ -166,7 +185,6 @@ struct lio_req { }; /* Structure of AIO data for process */ -TAILQ_HEAD(aiocbp_list, aiocbp); struct aioproc { kmutex_t aio_mtx; /* Protects the entire structure */ kcondvar_t aio_worker_cv; /* Signals on a new job */ @@ -175,11 +193,7 @@ struct aioproc { unsigned int jobs_count; /* Count of the jobs */ TAILQ_HEAD(, aio_job) jobs_queue;/* Queue of the AIO jobs */ struct lwp *aio_worker; /* AIO worker thread */ - kmutex_t aio_hash_mtx; /* Protects the hash table */ - struct aiost_list aiost_total; /* Total list of servicing threads */ - struct aiocbp_list *aio_hash; /* Aiocbp hash root */ - size_t aio_hash_size; /* Total number of buckets */ - u_int aio_hash_mask; /* Hash mask */ + struct aiosp aiosp; /* Per-process service pool */ }; extern u_int aio_listio_max; @@ -191,19 +205,20 @@ extern u_int aio_listio_max; void aio_print_jobs(void (*)(const char *, ...) __printflike(1, 2)); int aio_suspend1(struct lwp *, struct aiocb **, int, struct timespec *); +int aiosp_initialize(struct aiosp *); +int aiosp_destroy(struct aiosp *); int aiosp_distribute_jobs(struct aiosp *); -int aiosp_dispense_bank(void); -int aiosp_enqueue_job(struct aio_job *); -int aiosp_suspend(struct aioproc *, struct aiocb **, int, struct timespec *, +int aiosp_enqueue_job(struct aiosp *, struct aio_job *); +int aiosp_suspend(struct aiosp *, struct aiocb **, int, struct timespec *, uint32_t); -int aiosp_flush(struct aioproc *); -int aiosp_validate_conflicts(struct aioproc *, void *); - -void aiocbp_destroy(struct aioproc *); -int aiocbp_init(struct aioproc *, u_int); -int aiocbp_lookup(struct aioproc *, struct aiocbp **, void *); -int aiocbp_remove(struct aioproc *, void *); -int aiocbp_insert(struct aioproc *, struct aiocbp *); +int aiosp_flush(struct aiosp *); +int aiosp_validate_conflicts(struct aiosp *, void *); + +void aiocbp_destroy(struct aiosp *); +int aiocbp_init(struct aiosp *, u_int); +int aiocbp_lookup(struct aiosp *, struct aiocbp **, void *); +int aiocbp_remove(struct aiosp *, void *); +int aiocbp_insert(struct aiosp *, struct aiocbp *); #endif /* _KERNEL */ From ec5fb559478bc231e028aea5dac17b9733893fd8 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Sun, 13 Jul 2025 21:04:05 -0600 Subject: [PATCH 31/53] miscellaneous bugs --- sys/kern/sys_aiosp.c | 30 ++++++++++++++++++++++++++---- sys/sys/aio.h | 1 - 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 5337578d4a06b..d839c8a1f7baa 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -129,6 +129,7 @@ aiosp_distribute_jobs(struct aiosp *sp) TAILQ_FOREACH_SAFE(job, &sp->jobs, list, tmp) { fp = fd_getfile2(job->p, job->aiocbp.aio_fildes); if (fp == NULL) { + mutex_exit(&sp->mtx); error = SET_ERROR(EBADF); return error; } @@ -146,7 +147,6 @@ aiosp_distribute_jobs(struct aiosp *sp) fg->fp = fp; fg->vp = fp->f_vnode; fg->queue_size = 0; - fg->refcnt = 1; TAILQ_INIT(&fg->queue); error = aiosp_worker_extract(sp, &aiost); @@ -227,7 +227,8 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, } if (timo <= 0) { - return SET_ERROR(EAGAIN); + error = SET_ERROR(EAGAIN); + return error; } } else { timo = 0; @@ -277,7 +278,7 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, ops->completed++; } - if (job->ops_total <= job->ops_size) { + if (job->ops_total >= job->ops_size) { size_t old_size = job->ops_size; size_t new_size = job->ops_size == 0 ? 1 : (job->ops_size == 1 ? 2 : @@ -562,7 +563,9 @@ aiost_entry(void *arg) struct aio_job *tmp; TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { if (job->aio_op & (AIO_READ | AIO_WRITE)) { + // implement and call io_read/write } else if (job->aio_op & AIO_SYNC) { + // implement and call io_sync } mutex_enter(&job->mtx); @@ -620,6 +623,26 @@ aiost_entry(void *arg) mutex_exit(&sp->mtx); } + if (st->fg) { + struct aiost_file_group *fg = st->fg; + + struct aio_job *tmp; + TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { + mutex_enter(&job->mtx); + job->completed = true; + mutex_exit(&job->mtx); + + // CONFIRM WHETHER OR NOT THIS IS EXPECTED BEHAVIOUR + aiost_notify_ops(job); + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + + TAILQ_REMOVE(&fg->queue, job, list); + fg->queue_size--; + } + + kmem_free(fg, sizeof(*fg)); + } + mutex_enter(&sp->mtx); if (st->freelist) { @@ -806,7 +829,6 @@ aiost_terminate(struct aiost *st) cv_destroy(&st->service_cv); mutex_destroy(&st->mtx); - kmem_free(st, sizeof(*st)); return error; } diff --git a/sys/sys/aio.h b/sys/sys/aio.h index c9cf35257731c..c8dad356fb56f 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -135,7 +135,6 @@ struct aiost_file_group { struct aiost *aiost; TAILQ_HEAD(, aio_job) queue; size_t queue_size; - int refcnt; }; /* Structure for AIO servicing thread */ From 48a1f0127dd5742c45620d54b521fc6b98b33b01 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Mon, 14 Jul 2025 00:07:26 -0600 Subject: [PATCH 32/53] io rw fallback --- sys/kern/sys_aiosp.c | 184 ++++++++++++++++++++++++++++++------------- 1 file changed, 131 insertions(+), 53 deletions(-) diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index d839c8a1f7baa..dc0f350b8b960 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -66,12 +66,18 @@ static int aiosp_worker_extract(struct aiosp *, struct aiost **); static int aiost_create(struct aiosp *, struct aiost **); static int aiost_terminate(struct aiost *); -static int aiost_process_rw(struct aiost *); -static int aiost_process_sync(struct aiost *); static void aiost_entry(void *); static void aiost_sigsend(struct proc *, struct sigevent *); static void aiost_notify_ops (struct aio_job *); +static int io_write(struct aiost *); +static int io_read(struct aiost *); +static int io_sync(struct aiost *); +static int io_construct(struct aio_job *, struct file **, + struct iovec *, struct uio *); +static int io_write_fallback(struct aio_job *); +static int io_read_fallback(struct aio_job *); + /* * Module interface */ @@ -562,10 +568,14 @@ aiost_entry(void *arg) struct aio_job *tmp; TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { - if (job->aio_op & (AIO_READ | AIO_WRITE)) { - // implement and call io_read/write + if (job->aio_op & AIO_READ) { + error = io_read(st); + } else if (job->aio_op & AIO_WRITE) { + error = io_write(st); } else if (job->aio_op & AIO_SYNC) { - // implement and call io_sync + error = io_sync(st); + } else { + panic("aio_process: invalid operation code\n"); } mutex_enter(&job->mtx); @@ -587,10 +597,12 @@ aiost_entry(void *arg) } else { job = st->job; KASSERT(job != NULL); - if (job->aio_op & (AIO_READ | AIO_WRITE)) { - error = aiost_process_rw(st); + if (job->aio_op & AIO_READ) { + error = io_read_fallback(job); + } else if (job->aio_op & AIO_WRITE) { + error = io_write_fallback(job); } else if (job->aio_op & AIO_SYNC) { - error = aiost_process_sync(st); + error = io_sync(st); } else { panic("aio_process: invalid operation code\n"); } @@ -682,68 +694,134 @@ aiost_sigsend(struct proc *p, struct sigevent *sig) } /* - * processes a read/write asynchronous operations + * */ static int -aiost_process_rw(struct aiost *aiost) +io_write(struct aiost *aiost) +{ + return 0; +} + +/* + * + */ +static int +io_read(struct aiost *aiost) +{ + return 0; +} + +/* + * + */ +static int +io_construct(struct aio_job *job, struct file **fp, struct iovec *aiov, + struct uio *auio) { - struct aio_job *job = aiost->job; struct aiocb *aiocbp = &job->aiocbp; - struct file *fp; int fd = aiocbp->aio_fildes; int error = 0; - struct iovec aiov; - struct uio auio; - if (aiocbp->aio_nbytes > SSIZE_MAX) { error = SET_ERROR(EINVAL); - goto done; + return error; } - fp = fd_getfile2(job->p, fd); - if (fp == NULL) { + *fp = fd_getfile2(job->p, fd); + if (*fp == NULL) { error = SET_ERROR(EBADF); + return error; + } + + aiov->iov_base = aiocbp->aio_buf; + aiov->iov_len = aiocbp->aio_nbytes; + auio->uio_iov = aiov; + auio->uio_iovcnt = 1; + auio->uio_resid = aiocbp->aio_nbytes; + auio->uio_offset = aiocbp->aio_offset; + auio->uio_vmspace = job->p->p_vmspace; + + return 0; +} + +/* + * + */ +static int +io_write_fallback(struct aio_job *job) +{ + struct file *fp; + struct iovec aiov; + struct uio auio; + struct aiocb *aiocbp; + int error; + + error = io_construct(job, &fp, &aiov, &auio); + if (error) { goto done; } - aiov.iov_base = aiocbp->aio_buf; - aiov.iov_len = aiocbp->aio_nbytes; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_resid = aiocbp->aio_nbytes; - auio.uio_offset = aiocbp->aio_offset; - auio.uio_vmspace = job->p->p_vmspace; + /* + * Perform write + */ + aiocbp = &job->aiocbp; + KASSERT(job->aio_op & AIO_WRITE); - if (job->aio_op & AIO_READ) { - /* - * Perform a Read operation - */ - KASSERT((job->aio_op & AIO_WRITE) == 0); + if ((fp->f_flag & FWRITE) == 0) { + closef(fp); + error = SET_ERROR(EBADF); + goto done; + } + auio.uio_rw = UIO_WRITE; + error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, + &auio, fp->f_cred, FOF_UPDATE_OFFSET); - if ((fp->f_flag & FREAD) == 0) { - closef(fp); - error = SET_ERROR(EBADF); - goto done; - } - auio.uio_rw = UIO_READ; - error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, - &auio, fp->f_cred, FOF_UPDATE_OFFSET); - } else { - /* - * Perform a Write operation - */ - KASSERT(job->aio_op & AIO_WRITE); + closef(fp); - if ((fp->f_flag & FWRITE) == 0) { - closef(fp); - error = SET_ERROR(EBADF); - goto done; - } - auio.uio_rw = UIO_WRITE; - error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, - &auio, fp->f_cred, FOF_UPDATE_OFFSET); + /* + * Store the result value + */ + job->aiocbp.aio_nbytes -= auio.uio_resid; + job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; +done: + job->aiocbp._errno = error; + job->aiocbp._state = JOB_DONE; + + return 0; +} + +/* + * + */ +static int +io_read_fallback(struct aio_job *job) +{ + struct file *fp; + struct iovec aiov; + struct uio auio; + struct aiocb *aiocbp; + int error; + + error = io_construct(job, &fp, &aiov, &auio); + if (error) { + goto done; } + + /* + * Perform read + */ + aiocbp = &job->aiocbp; + KASSERT((job->aio_op & AIO_WRITE) == 0); + + if ((fp->f_flag & FREAD) == 0) { + closef(fp); + error = SET_ERROR(EBADF); + goto done; + } + auio.uio_rw = UIO_READ; + error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, + &auio, fp->f_cred, FOF_UPDATE_OFFSET); + closef(fp); /* @@ -759,10 +837,10 @@ aiost_process_rw(struct aiost *aiost) } /* - * processes a sync/dsync asynchronous operations + * process sync/dsync */ static int -aiost_process_sync(struct aiost *aiost) +io_sync(struct aiost *aiost) { struct aio_job *job = aiost->job; struct aiocb *aiocbp = &job->aiocbp; From 2e907f2178140ad8f228af143638e900e27baaf3 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Wed, 30 Jul 2025 01:20:29 -0600 Subject: [PATCH 33/53] aiowaitgroup/suspend reorganisation --- sys/kern/sys_aio.c | 19 +- sys/kern/sys_aiosp.c | 427 ++++++++++++++++++++++++------------------- sys/sys/aio.h | 41 +++-- 3 files changed, 286 insertions(+), 201 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 237f3758014fb..f8451814b8a6d 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -200,6 +200,8 @@ aio_procinit(struct proc *p) } #endif + printf("doing this?\n"); + /* Initialize queue and their synchronization structures */ mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&aio->aio_worker_cv, "aiowork"); @@ -334,6 +336,8 @@ aio_worker(void *arg) (void)copyout(&a_job->aiocbp, a_job->aiocb_uptr, sizeof(struct aiocb)); + printf("I am looking to read this timestamp!\n"); + mutex_enter(&aio->aio_mtx); KASSERT(aio->curjob == a_job); aio->curjob = NULL; @@ -355,6 +359,10 @@ aio_worker(void *arg) pool_put(&aio_lio_pool, lio); } + mutex_destroy(&a_job->mtx); +#ifdef AIOSP + aiowaitgrouplk_fini(&a_job->lk); +#endif /* Destroy the job */ pool_put(&aio_job_pool, a_job); } @@ -611,6 +619,9 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) a_job->aio_op |= op; a_job->lio = lio; mutex_init(&a_job->mtx, MUTEX_DEFAULT, IPL_NONE); +#ifdef AIOSP + aiowaitgrouplk_init(&a_job->lk); +#endif /* * Add the job to the queue, update the counters, and @@ -651,8 +662,8 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) aio->jobs_count++; if (lio) lio->refcnt++; - cv_signal(&aio->aio_worker_cv); mutex_exit(&aio->aio_mtx); + cv_signal(&aio->aio_worker_cv); #endif /* * One would handle the errors only with aio_error() function. @@ -794,6 +805,8 @@ sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, if (error) return error; + printf("%d %d\n", aiocbp._state == JOB_NONE, aiocbp._state == JOB_DONE); + if (aiocbp._state == JOB_NONE) return SET_ERROR(EINVAL); @@ -853,6 +866,8 @@ sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, if (error) return error; + printf("inside kernel %d %d\n", aiocbp._errno == EINPROGRESS, aiocbp._state != JOB_DONE); + if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE) return SET_ERROR(EINVAL); @@ -901,7 +916,7 @@ sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, struct aioproc *aio = p->p_aio; KASSERT(aio); error = aiosp_suspend(&aio->aiosp, list, nent, SCARG(uap, timeout) ? - &ts : NULL, AIOSP_SUSPEND_ALL); + &ts : NULL, AIOSP_SUSPEND_ANY); #else error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); #endif diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index dc0f350b8b960..1baef56fbbea6 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -59,19 +59,14 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp MODULE(MODULE_CLASS_MISC, aiosp, NULL); -static size_t aiosp_ops_expected(size_t); -static void aiosp_ops_init(struct aiosp_ops *); -static void aiosp_ops_fini(struct aiosp_ops *); -static int aiosp_worker_extract(struct aiosp *, struct aiost **); - static int aiost_create(struct aiosp *, struct aiost **); static int aiost_terminate(struct aiost *); static void aiost_entry(void *); static void aiost_sigsend(struct proc *, struct sigevent *); -static void aiost_notify_ops (struct aio_job *); +static int aiosp_worker_extract(struct aiosp *, struct aiost **); -static int io_write(struct aiost *); -static int io_read(struct aiost *); +static int io_write(struct aiost *, struct aio_job *); +static int io_read(struct aiost *, struct aio_job *); static int io_sync(struct aiost *); static int io_construct(struct aio_job *, struct file **, struct iovec *, struct uio *); @@ -222,7 +217,7 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, struct timespec *ts, uint32_t flags) { struct aio_job *job; - int error; + int error = 0; int timo; size_t target = 0; @@ -248,8 +243,8 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, target = AIOSP_SUSPEND_NEXTRACT(flags); } - struct aiosp_ops *ops = kmem_zalloc(sizeof(*ops), KM_SLEEP); - aiosp_ops_init(ops); + struct aiowaitgroup *wg = kmem_zalloc(sizeof(*wg), KM_SLEEP); + aiowaitgroup_init(wg); /* * We want a hash table that tracks jobs, using uptr as a key. We use @@ -259,7 +254,7 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, * both aiosts are assigned to both threads. */ - mutex_enter(&ops->mtx); + mutex_enter(&wg->mtx); for (int i = 0; i < nent; i++) { if (aiocbp_list[i] == NULL) { continue; @@ -268,68 +263,36 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, struct aiocbp *aiocbp = NULL; error = aiocbp_lookup(aiosp, &aiocbp, aiocbp_list[i]); if (error) { - mutex_exit(&ops->mtx); - aiosp_ops_fini(ops); - kmem_free(ops, sizeof(*ops)); - return error; + goto done; } if (aiocbp == NULL) { continue; } job = aiocbp->job; + KASSERT(job); mutex_enter(&job->mtx); if (job->completed) { - ops->completed++; - } - - if (job->ops_total >= job->ops_size) { - size_t old_size = job->ops_size; - size_t new_size = job->ops_size == 0 ? 1 : - (job->ops_size == 1 ? 2 : - (job->ops_size * job->ops_size)); - - struct aiosp_ops **new_ops = kmem_zalloc(new_size * - sizeof(*new_ops), KM_SLEEP); - - if (job->ops && old_size) { - memcpy(new_ops, job->ops, - job->ops_total * sizeof(*job->ops)); - kmem_free(job->ops, old_size * - sizeof(*job->ops)); - } - - job->ops_size = new_size; - job->ops = new_ops; + wg->completed++; + } else { + printf("attaching to job %lx\n", (uintptr_t)&job->lk); + aiowaitgroup_join(wg, &job->lk); } - - job->ops[job->ops_total] = ops; - job->ops_total++; - mutex_exit(&job->mtx); - ops->total++; } - for (; ops->completed < target;) { - //printf("waiting on ops %ld %ld\n", ops->completed, target); - error = cv_timedwait_sig(&ops->done_cv, &ops->mtx, timo); + for (; wg->completed < target;) { + error = aiowaitgroup_wait(wg, timo); if (error) { - if (error == EWOULDBLOCK) { - error = SET_ERROR(EAGAIN); - } - - mutex_exit(&ops->mtx); - aiosp_ops_fini(ops); - kmem_free(ops, sizeof(*ops)); - - return error; + goto done; } } - mutex_exit(&ops->mtx); - aiosp_ops_fini(ops); - kmem_free(ops, sizeof(*ops)); +done: + mutex_exit(&wg->mtx); + wg->refcnt--; + wg->active = false; return error; } @@ -350,27 +313,6 @@ aiosp_initialize(struct aiosp *sp) return 0; } -/* - * The size of aiost->ops scales with powers of two. The size of aiost->ops will - * only either collapse to zero upon being terminated, or continue growing, so - * scaling by a power of two is simple enough. - */ -static size_t -aiosp_ops_expected(size_t total) -{ - if (total <= 1) { - return 1; - } - - total -= 1; - for (int j = 0; j < ilog2(sizeof(total) * 8); j++) { - total |= total >> (1 << j); - } - total += 1; - - return total; -} - /* * */ @@ -494,30 +436,68 @@ aiost_create(struct aiosp *sp, struct aiost **ret) } /* - * wake up anyone waiting on the completion of this job + * + */ +static void +aiost_process_singleton (struct aiost *st) +{ + struct aio_job *job; + + job = st->job; + KASSERT(job != NULL); + if (job->aio_op & AIO_READ) { + io_read_fallback(job); + } else if (job->aio_op & AIO_WRITE) { + io_write_fallback(job); + } else if (job->aio_op & AIO_SYNC) { + io_sync(st); + } else { + panic("aio_process: invalid operation code\n"); + } + + mutex_enter(&job->mtx); + job->completed = true; + aiowaitgrouplk_flush(&job->lk); + mutex_exit(&job->mtx); + + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); +} + +/* + * */ static void -aiost_notify_ops (struct aio_job *job) +aiost_process_fg (struct aiost *st) { - for (int i = 0; i < job->ops_total; i++) { - struct aiosp_ops *ops = job->ops[i]; - if (ops == NULL) { - continue; + struct aiosp *sp = st->aiosp; + struct aiost_file_group *fg = st->fg; + struct aio_job *job; + + struct aio_job *tmp; + TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { + if (job->aio_op & AIO_READ) { + io_read(st, job); + } else if (job->aio_op & AIO_WRITE) { + io_write(st, job); + } else if (job->aio_op & AIO_SYNC) { + io_sync(st); + } else { + panic("aio_process: invalid operation code\n"); } - mutex_enter(&ops->mtx); - KASSERT(ops->total > ops->completed); - ops->completed++; - mutex_exit(&ops->mtx); - cv_signal(&ops->done_cv); - } + mutex_enter(&job->mtx); + job->completed = true; + aiowaitgrouplk_flush(&job->lk); + mutex_exit(&job->mtx); - if (job->ops && job->ops_total) { - size_t total = aiosp_ops_expected(job->ops_total); - kmem_free(job->ops, total * sizeof(*job->ops)); - job->ops_total = 0; - job->ops = NULL; + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); } + + mutex_enter(&sp->mtx); + RB_REMOVE(aiost_file_tree, sp->fg_root, fg); + closef(fg->fp); + kmem_free(fg, sizeof(*fg)); + mutex_exit(&sp->mtx); } /* @@ -530,7 +510,6 @@ aiost_entry(void *arg) { struct aiost *st = arg; struct aiosp *sp = st->aiosp; - struct aio_job *job; int error; /* @@ -563,56 +542,29 @@ aiost_entry(void *arg) st->state); } - if (st->fg) { - struct aiost_file_group *fg = st->fg; - - struct aio_job *tmp; - TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { - if (job->aio_op & AIO_READ) { - error = io_read(st); - } else if (job->aio_op & AIO_WRITE) { - error = io_write(st); - } else if (job->aio_op & AIO_SYNC) { - error = io_sync(st); - } else { - panic("aio_process: invalid operation code\n"); - } - - mutex_enter(&job->mtx); - job->completed = true; - mutex_exit(&job->mtx); + // A MORE LOGICAL SOLUTION FILE GROUPS ARE JUST LISTIO INSIDE + // THE KERNEL (OR CAN THEY NOT BE??? HOW ABOUT ADD EXTRA + // FUNCTIONALITY TO THEM LIKE BEING ABLE TO DYNAMICALLY APPEND + // NEW OPS WHILE EVERYTHING IS IN THE MIDDLE OF BEING + // PROCESSED? NO IT IS NOT. IT IS ABOUT COMBINING OBJECTS THAT + // HAVE TO BLOCK VERSUS OBJECTS THAT DO. ALSO COMBINE AIOSP AND + // AIO TOGETHER THEIR SEPARATENESS IS GETTING ON MY NERVES + // STRIP AWAY USELESS MUMBO AI JUMBO AND MAKE WORK I SHOULD BE + // ABLE TO ACHIEVE CONCURRENCY ACROSS MULTIPLE FILES + // SIMPLIFY AND STREAMLINE DESIGN AND DOCUMENT WHEN NECESSARY + // SEND AN EMAIL OFF TO JASON AND CHISTOS + // IMPLEMENT RANGE LOCKS #1 + + //printf("%d %d\n", mutex_owned(&sp->mtx), mutex_owned(&st->mtx)); - aiost_notify_ops(job); - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); - - TAILQ_REMOVE(&fg->queue, job, list); - fg->queue_size--; - } - - mutex_enter(&sp->mtx); - RB_REMOVE(aiost_file_tree, sp->fg_root, fg); - closef(fg->fp); - kmem_free(fg, sizeof(*fg)); - mutex_exit(&sp->mtx); + if (st->fg) { + aiost_process_fg(st); } else { - job = st->job; - KASSERT(job != NULL); - if (job->aio_op & AIO_READ) { - error = io_read_fallback(job); - } else if (job->aio_op & AIO_WRITE) { - error = io_write_fallback(job); - } else if (job->aio_op & AIO_SYNC) { - error = io_sync(st); - } else { - panic("aio_process: invalid operation code\n"); - } - - job->completed = true; - - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); - aiost_notify_ops(job); + aiost_process_singleton(st); } + printf("finished!!!\n"); + st->state = AIOST_STATE_NONE; st->job = NULL; st->fg = NULL; @@ -635,26 +587,6 @@ aiost_entry(void *arg) mutex_exit(&sp->mtx); } - if (st->fg) { - struct aiost_file_group *fg = st->fg; - - struct aio_job *tmp; - TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { - mutex_enter(&job->mtx); - job->completed = true; - mutex_exit(&job->mtx); - - // CONFIRM WHETHER OR NOT THIS IS EXPECTED BEHAVIOUR - aiost_notify_ops(job); - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); - - TAILQ_REMOVE(&fg->queue, job, list); - fg->queue_size--; - } - - kmem_free(fg, sizeof(*fg)); - } - mutex_enter(&sp->mtx); if (st->freelist) { @@ -697,18 +629,18 @@ aiost_sigsend(struct proc *p, struct sigevent *sig) * */ static int -io_write(struct aiost *aiost) +io_write(struct aiost *aiost, struct aio_job *job) { - return 0; + return io_write_fallback(job); } /* * */ static int -io_read(struct aiost *aiost) +io_read(struct aiost *aiost, struct aio_job *job) { - return 0; + return io_read_fallback(job); } /* @@ -787,6 +719,9 @@ io_write_fallback(struct aio_job *job) job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; + copyout(&job->aiocbp, job->aiocb_uptr, + sizeof(struct aiocb)); + return 0; } @@ -833,6 +768,9 @@ io_read_fallback(struct aio_job *job) job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; + copyout(&job->aiocbp, job->aiocb_uptr, + sizeof(struct aiocb)); + return 0; } @@ -883,6 +821,9 @@ io_sync(struct aiost *aiost) job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; + copyout(&job->aiocbp, job->aiocb_uptr, + sizeof(struct aiocb)); + return 0; } @@ -911,28 +852,6 @@ aiost_terminate(struct aiost *st) return error; } -/* - * Initialises aiosp_ops - */ -static void -aiosp_ops_init(struct aiosp_ops *ops) -{ - ops->completed = 0; - ops->total = 0; - cv_init(&ops->done_cv, "aiodone"); - mutex_init(&ops->mtx, MUTEX_DEFAULT, IPL_NONE); -} - -/* - * Cleans up aiosp_ops - */ -static void -aiosp_ops_fini(struct aiosp_ops *ops) -{ - cv_destroy(&ops->done_cv); - mutex_destroy(&ops->mtx); -} - /* * Ensure that the same job can not be enqueued twice. */ @@ -1103,3 +1022,135 @@ aiocbp_destroy(struct aiosp *aiosp) aiosp->aio_hash_size = 0; mutex_exit(&aiosp->aio_hash_mtx); } + +/* + * + */ +void +aiowaitgroup_init(struct aiowaitgroup *wg) +{ + wg->completed = 0; + wg->total = 0; + wg->refcnt = 1; + wg->active = true; + cv_init(&wg->done_cv, "aiodone"); + mutex_init(&wg->mtx, MUTEX_DEFAULT, IPL_NONE); +} + +/* + * + */ +void +aiowaitgroup_fini(struct aiowaitgroup *wg) +{ + cv_destroy(&wg->done_cv); + mutex_destroy(&wg->mtx); + kmem_free(wg, sizeof(*wg)); +} + +/* + * + */ +int +aiowaitgroup_wait(struct aiowaitgroup *wg, int timo) +{ + int error; + + error = cv_timedwait_sig(&wg->done_cv, &wg->mtx, timo); + if (error) { + if (error == EWOULDBLOCK) { + error = SET_ERROR(EAGAIN); + } + return error; + } + + return 0; +} + +/* + * + */ +void +aiowaitgrouplk_init(struct aiowaitgrouplk *lk) +{ + lk->n = 0; + lk->s = 2; + lk->wgs = kmem_alloc(sizeof(*lk->wgs) * lk->s, KM_SLEEP); +} + +/* + * + */ +void +aiowaitgrouplk_fini(struct aiowaitgrouplk *lk) +{ + if (!lk->s) { + return; + } + + kmem_free(lk->wgs, sizeof(*lk->wgs) * lk->s); +} + +/* + * + */ +void +aiowaitgrouplk_flush(struct aiowaitgrouplk *lk) +{ + printf("flushing %lx on %ld\n", (uintptr_t)lk, lk->n); + + for (int i = 0; i < lk->n; i++) { + struct aiowaitgroup *wg = lk->wgs[i]; + if (wg == NULL) { + continue; + } + + mutex_enter(&wg->mtx); + if (wg->active == false) { + lk->wgs[i] = NULL; + } + + KASSERT(wg->total > wg->completed); + wg->completed++; + if (!--wg->refcnt) { + mutex_exit(&wg->mtx); + aiowaitgroup_fini(wg); + } else { + mutex_exit(&wg->mtx); + cv_signal(&wg->done_cv); + } + } + + if (lk->n) { + kmem_free(lk->wgs, lk->s * sizeof(*lk->wgs)); + + lk->wgs = NULL; + lk->s = 0; + lk->n = 0; + } +} + +/* + * + */ +void +aiowaitgroup_join(struct aiowaitgroup *wg, struct aiowaitgrouplk *lk) +{ +// KASSERT(lk->n < lk->s); + if (lk->n == lk->s) { + size_t new_size = lk->s * lk->s; + + void **new_wgs = kmem_zalloc(new_size * + sizeof(*new_wgs), KM_SLEEP); + + memcpy(new_wgs, lk->wgs, lk->n * sizeof(*lk->wgs)); + kmem_free(lk->wgs, lk->s * sizeof(*lk->wgs)); + + lk->s = new_size; + lk->wgs = new_wgs; + } + lk->wgs[lk->n] = wg; + lk->n++; + wg->total++; + wg->refcnt++; +} diff --git a/sys/sys/aio.h b/sys/sys/aio.h index c8dad356fb56f..b1d6cefefdd48 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -91,8 +91,26 @@ struct aiocb { #define JOB_WIP 0x1 #define JOB_DONE 0x2 +/* Structure for tracking the status of a collection of OPS */ +struct aiowaitgroup { + kmutex_t mtx; /* Protects this structure */ + kcondvar_t done_cv; /* Signals when a job is complete */ + size_t completed; /* Keeps track of the number of completed jobs */ + size_t total; /* Keeps track of the number of total jobs */ + bool active; + int refcnt; +}; + +/* */ +struct aiowaitgrouplk { + void **wgs; /* Array of ops */ + size_t s; /* Size of ops array */ + size_t n; /* Total number of connected ops */ +}; + /* Structure of AIO job */ struct aiost; +struct buf; struct aio_job { kmutex_t mtx; /* Protects this structure */ int aio_op; /* Operation code */ @@ -101,9 +119,9 @@ struct aio_job { void *aiocb_uptr; /* User-space pointer for identification of job */ struct proc *p; /* Process that instantiated the job */ bool completed; /* Marks the completion status of this job */ - struct aiosp_ops **ops; /* Array of ops */ - size_t ops_size; /* Size of ops array */ - size_t ops_total; /* Total number of connected ops */ + struct aiowaitgrouplk lk; + struct buf **buf; + uint nbuf; TAILQ_ENTRY(aio_job) list; struct lio_req *lio; }; @@ -119,14 +137,6 @@ struct aio_job { #define AIOSP_SUSPEND_NMASK(N) ((N) & 0xffff) << 16) #define AIOSP_SUSPEND_NEXTRACT(FLAGS) (((FLAGS) >> 16) & 0xffff) -/* Structure for tracking the status of a collection of OPS */ -struct aiosp_ops { - kmutex_t mtx; /* Protects this structure */ - kcondvar_t done_cv; /* Signals when a job is complete */ - size_t completed; /* Keeps track of the number of completed jobs */ - size_t total; /* Keeps track of the number of total jobs */ -}; - struct aiost; struct aiost_file_group { RB_ENTRY(aiost_file_group) tree; @@ -219,6 +229,15 @@ int aiocbp_lookup(struct aiosp *, struct aiocbp **, void *); int aiocbp_remove(struct aiosp *, void *); int aiocbp_insert(struct aiosp *, struct aiocbp *); +void aiowaitgroup_init(struct aiowaitgroup *); +void aiowaitgroup_fini(struct aiowaitgroup *); +int aiowaitgroup_wait(struct aiowaitgroup *, int); +void aiowaitgroup_done(struct aiowaitgroup *); +void aiowaitgroup_join(struct aiowaitgroup *, struct aiowaitgrouplk *); +void aiowaitgrouplk_init(struct aiowaitgrouplk *); +void aiowaitgrouplk_fini(struct aiowaitgrouplk *); +void aiowaitgrouplk_flush(struct aiowaitgrouplk *); + #endif /* _KERNEL */ From 30daa2c227b212227fea26ab4a33d36542864684 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Sat, 9 Aug 2025 00:23:11 -0600 Subject: [PATCH 34/53] aiowaitgroup lifecycle completion --- sys/kern/sys_aio.c | 38 ++++++----- sys/kern/sys_aiosp.c | 149 ++++++++++++++++++++++++++++++------------- sys/sys/aio.h | 17 +++-- 3 files changed, 132 insertions(+), 72 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index f8451814b8a6d..078c64ef9223d 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -200,8 +200,6 @@ aio_procinit(struct proc *p) } #endif - printf("doing this?\n"); - /* Initialize queue and their synchronization structures */ mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE); cv_init(&aio->aio_worker_cv, "aiowork"); @@ -336,8 +334,6 @@ aio_worker(void *arg) (void)copyout(&a_job->aiocbp, a_job->aiocb_uptr, sizeof(struct aiocb)); - printf("I am looking to read this timestamp!\n"); - mutex_enter(&aio->aio_mtx); KASSERT(aio->curjob == a_job); aio->curjob = NULL; @@ -795,24 +791,26 @@ sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, } */ struct proc *p = l->l_proc; struct aioproc *aio = p->p_aio; - struct aiocb aiocbp; - int error; if (aio == NULL) return SET_ERROR(EINVAL); - error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); +#ifdef AIOSP + const void *uptr = SCARG(uap, aiocbp); + return aiosp_error(&aio->aiosp, uptr, retval); +#else + struct aiocb aiocbp; + int error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); if (error) return error; - printf("%d %d\n", aiocbp._state == JOB_NONE, aiocbp._state == JOB_DONE); - if (aiocbp._state == JOB_NONE) return SET_ERROR(EINVAL); *retval = aiocbp._errno; return 0; +#endif } int @@ -856,20 +854,25 @@ sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, } */ struct proc *p = l->l_proc; struct aioproc *aio = p->p_aio; - struct aiocb aiocbp; - int error; - if (aio == NULL) + if (aio == NULL) { return SET_ERROR(EINVAL); + } +#ifdef AIOSP + const void *uptr = SCARG(uap, aiocbp); + return aiosp_return(&aio->aiosp, uptr, retval); +#else + struct aiocb aiocbp; + int error; error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); - if (error) + if (error) { return error; + } - printf("inside kernel %d %d\n", aiocbp._errno == EINPROGRESS, aiocbp._state != JOB_DONE); - - if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE) + if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE) { return SET_ERROR(EINVAL); + } *retval = aiocbp._retval; @@ -880,6 +883,7 @@ sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, error = copyout(&aiocbp, SCARG(uap, aiocbp), sizeof(struct aiocb)); return error; +#endif } int @@ -916,7 +920,7 @@ sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, struct aioproc *aio = p->p_aio; KASSERT(aio); error = aiosp_suspend(&aio->aiosp, list, nent, SCARG(uap, timeout) ? - &ts : NULL, AIOSP_SUSPEND_ANY); + &ts : NULL, AIOSP_SUSPEND_ALL); #else error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); #endif diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index 1baef56fbbea6..c67e1123d87bf 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -214,12 +214,13 @@ aiosp_distribute_jobs(struct aiosp *sp) */ int aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, - struct timespec *ts, uint32_t flags) + struct timespec *ts, int flags) { struct aio_job *job; int error = 0; int timo; size_t target = 0; + size_t monitor = 0; if (ts) { timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); @@ -235,16 +236,8 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, timo = 0; } - if (flags & AIOSP_SUSPEND_ANY) { - target = 1; - } else if (flags & AIOSP_SUSPEND_ALL) { - target = nent; - } else if (flags & AIOSP_SUSPEND_N) { - target = AIOSP_SUSPEND_NEXTRACT(flags); - } - struct aiowaitgroup *wg = kmem_zalloc(sizeof(*wg), KM_SLEEP); - aiowaitgroup_init(wg); + aiowaitgroup_init(wg); /* * We want a hash table that tracks jobs, using uptr as a key. We use @@ -270,18 +263,28 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, } job = aiocbp->job; - KASSERT(job); + monitor++; mutex_enter(&job->mtx); if (job->completed) { wg->completed++; + wg->total++; } else { - printf("attaching to job %lx\n", (uintptr_t)&job->lk); aiowaitgroup_join(wg, &job->lk); } mutex_exit(&job->mtx); } + if (!monitor) { + goto done; + } + + if (flags & AIOSP_SUSPEND_ANY) { + target = 1; + } else if (flags & AIOSP_SUSPEND_ALL) { + target = monitor; + } + for (; wg->completed < target;) { error = aiowaitgroup_wait(wg, timo); if (error) { @@ -290,9 +293,15 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, } done: - mutex_exit(&wg->mtx); - wg->refcnt--; wg->active = false; + wg->refcnt--; + + if (wg->refcnt == 0) { + mutex_exit(&wg->mtx); + aiowaitgroup_fini(wg); + } else { + mutex_exit(&wg->mtx); + } return error; } @@ -457,9 +466,10 @@ aiost_process_singleton (struct aiost *st) mutex_enter(&job->mtx); job->completed = true; - aiowaitgrouplk_flush(&job->lk); mutex_exit(&job->mtx); + aiowaitgrouplk_flush(&job->lk); + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); } @@ -487,9 +497,10 @@ aiost_process_fg (struct aiost *st) mutex_enter(&job->mtx); job->completed = true; - aiowaitgrouplk_flush(&job->lk); mutex_exit(&job->mtx); + aiowaitgrouplk_flush(&job->lk); + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); } @@ -563,8 +574,6 @@ aiost_entry(void *arg) aiost_process_singleton(st); } - printf("finished!!!\n"); - st->state = AIOST_STATE_NONE; st->job = NULL; st->fg = NULL; @@ -719,9 +728,6 @@ io_write_fallback(struct aio_job *job) job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; - copyout(&job->aiocbp, job->aiocb_uptr, - sizeof(struct aiocb)); - return 0; } @@ -768,9 +774,6 @@ io_read_fallback(struct aio_job *job) job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; - copyout(&job->aiocbp, job->aiocb_uptr, - sizeof(struct aiocb)); - return 0; } @@ -856,7 +859,7 @@ aiost_terminate(struct aiost *st) * Ensure that the same job can not be enqueued twice. */ int -aiosp_validate_conflicts(struct aiosp *aiosp, void *uptr) +aiosp_validate_conflicts(struct aiosp *aiosp, const void *uptr) { struct aiost *st; @@ -877,11 +880,63 @@ aiosp_validate_conflicts(struct aiosp *aiosp, void *uptr) return 0; } +/* + * + */ +int aiosp_error(struct aiosp *aiosp, const void *uptr, register_t *retval) +{ + struct aiocbp *aiocbp = NULL; + struct aio_job *job; + int error; + + error = aiocbp_lookup(aiosp, &aiocbp, uptr); + if (error) { + return error; + } + + job = aiocbp->job; + if (job->aiocbp._state == JOB_NONE) { + return SET_ERROR(EINVAL); + } + + *retval = job->aiocbp._errno; + + return error; +} + +/* + * + */ +int aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) +{ + struct aiocbp *aiocbp = NULL; + struct aio_job *job; + int error; + + error = aiocbp_lookup(aiosp, &aiocbp, uptr); + if (error) { + return error; + } + job = aiocbp->job; + + if (job->aiocbp._errno == EINPROGRESS || job->aiocbp._state != JOB_DONE) { + return SET_ERROR(EINVAL); + } + + *retval = job->aiocbp._retval; + + job->aiocbp._errno = 0; + job->aiocbp._retval = -1; + job->aiocbp._state = JOB_NONE; + + return 0; +} + /* * aiocbp hash function */ static inline u_int -aiocbp_hash(void *uptr) +aiocbp_hash(const void *uptr) { return hash32_buf(&uptr, sizeof(uptr), HASH32_BUF_INIT); } @@ -890,7 +945,7 @@ aiocbp_hash(void *uptr) * aiocbp hash lookup */ int -aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, void *uptr) +aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, const void *uptr) { struct aiocbp *aiocbp; u_int hash; @@ -918,7 +973,7 @@ aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, void *uptr) * aiocbp hash removal */ int -aiocbp_remove(struct aiosp *aiosp, void *uptr) +aiocbp_remove(struct aiosp *aiosp, const void *uptr) { struct aiocbp *aiocbp; u_int hash; @@ -946,7 +1001,7 @@ int aiocbp_insert(struct aiosp *aiosp, struct aiocbp *aiocbp) { struct aiocbp *found; - void *uptr; + const void *uptr; u_int hash; uptr = aiocbp->uptr; @@ -1073,6 +1128,7 @@ aiowaitgroup_wait(struct aiowaitgroup *wg, int timo) void aiowaitgrouplk_init(struct aiowaitgrouplk *lk) { + mutex_init(&lk->mtx, MUTEX_DEFAULT, IPL_NONE); lk->n = 0; lk->s = 2; lk->wgs = kmem_alloc(sizeof(*lk->wgs) * lk->s, KM_SLEEP); @@ -1084,21 +1140,19 @@ aiowaitgrouplk_init(struct aiowaitgrouplk *lk) void aiowaitgrouplk_fini(struct aiowaitgrouplk *lk) { - if (!lk->s) { - return; - } + mutex_destroy(&lk->mtx); - kmem_free(lk->wgs, sizeof(*lk->wgs) * lk->s); + if (lk->s) { + kmem_free(lk->wgs, sizeof(*lk->wgs) * lk->s); + } } -/* - * - */ void aiowaitgrouplk_flush(struct aiowaitgrouplk *lk) { printf("flushing %lx on %ld\n", (uintptr_t)lk, lk->n); + mutex_enter(&lk->mtx); for (int i = 0; i < lk->n; i++) { struct aiowaitgroup *wg = lk->wgs[i]; if (wg == NULL) { @@ -1106,28 +1160,29 @@ aiowaitgrouplk_flush(struct aiowaitgrouplk *lk) } mutex_enter(&wg->mtx); - if (wg->active == false) { - lk->wgs[i] = NULL; + + if (wg->active) { + wg->completed++; + cv_signal(&wg->done_cv); } - KASSERT(wg->total > wg->completed); - wg->completed++; - if (!--wg->refcnt) { + if (--wg->refcnt == 0) { mutex_exit(&wg->mtx); aiowaitgroup_fini(wg); } else { mutex_exit(&wg->mtx); - cv_signal(&wg->done_cv); } } if (lk->n) { - kmem_free(lk->wgs, lk->s * sizeof(*lk->wgs)); + kmem_free(lk->wgs, sizeof(*lk->wgs) * lk->s); - lk->wgs = NULL; - lk->s = 0; lk->n = 0; + lk->s = 2; + lk->wgs = kmem_alloc(sizeof(*lk->wgs) * lk->s, KM_SLEEP); } + + mutex_exit(&lk->mtx); } /* @@ -1136,7 +1191,8 @@ aiowaitgrouplk_flush(struct aiowaitgrouplk *lk) void aiowaitgroup_join(struct aiowaitgroup *wg, struct aiowaitgrouplk *lk) { -// KASSERT(lk->n < lk->s); + KASSERT(lk->n < lk->s); + mutex_enter(&lk->mtx); if (lk->n == lk->s) { size_t new_size = lk->s * lk->s; @@ -1153,4 +1209,5 @@ aiowaitgroup_join(struct aiowaitgroup *wg, struct aiowaitgrouplk *lk) lk->n++; wg->total++; wg->refcnt++; + mutex_exit(&lk->mtx); } diff --git a/sys/sys/aio.h b/sys/sys/aio.h index b1d6cefefdd48..684d8984a1640 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -103,6 +103,7 @@ struct aiowaitgroup { /* */ struct aiowaitgrouplk { + kmutex_t mtx; /* */ void **wgs; /* Array of ops */ size_t s; /* Size of ops array */ size_t n; /* Total number of connected ops */ @@ -132,10 +133,6 @@ struct aio_job { #define AIOSP_SUSPEND_ANY 0x1 #define AIOSP_SUSPEND_ALL 0x2 -#define AIOSP_SUSPEND_N 0x4 - -#define AIOSP_SUSPEND_NMASK(N) ((N) & 0xffff) << 16) -#define AIOSP_SUSPEND_NEXTRACT(FLAGS) (((FLAGS) >> 16) & 0xffff) struct aiost; struct aiost_file_group { @@ -163,7 +160,7 @@ struct aiost { struct aiocbp { TAILQ_ENTRY(aiocbp) list; - void *uptr; + const void *uptr; struct aio_job *job; }; @@ -219,14 +216,16 @@ int aiosp_destroy(struct aiosp *); int aiosp_distribute_jobs(struct aiosp *); int aiosp_enqueue_job(struct aiosp *, struct aio_job *); int aiosp_suspend(struct aiosp *, struct aiocb **, int, struct timespec *, - uint32_t); + int); int aiosp_flush(struct aiosp *); -int aiosp_validate_conflicts(struct aiosp *, void *); +int aiosp_validate_conflicts(struct aiosp *, const void *); +int aiosp_error (struct aiosp *, const void *, register_t *); +int aiosp_return (struct aiosp *, const void *, register_t *); void aiocbp_destroy(struct aiosp *); int aiocbp_init(struct aiosp *, u_int); -int aiocbp_lookup(struct aiosp *, struct aiocbp **, void *); -int aiocbp_remove(struct aiosp *, void *); +int aiocbp_lookup(struct aiosp *, struct aiocbp **, const void *); +int aiocbp_remove(struct aiosp *, const void *); int aiocbp_insert(struct aiosp *, struct aiocbp *); void aiowaitgroup_init(struct aiowaitgroup *); From caffe3babf78608031de8ac9b431e521624a75a3 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Wed, 13 Aug 2025 20:04:17 -0600 Subject: [PATCH 35/53] extensive comments and polishing --- sys/kern/sys_aio.c | 11 ++- sys/kern/sys_aiosp.c | 186 ++++++++++++++++++++++++++----------------- sys/sys/aio.h | 48 +++++------ 3 files changed, 145 insertions(+), 100 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 078c64ef9223d..2e60604a83bbd 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -801,14 +801,19 @@ sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, #else struct aiocb aiocbp; int error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); - if (error) + if (error) { + printf("WHAT!\n"); return error; + } - if (aiocbp._state == JOB_NONE) + if (aiocbp._state == JOB_NONE) { + printf("WHA!T!\n"); return SET_ERROR(EINVAL); + } *retval = aiocbp._errno; + printf("WHY!!\n"); return 0; #endif } @@ -869,6 +874,8 @@ sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, if (error) { return error; } + + printf("WHAT!\n"); if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE) { return SET_ERROR(EINVAL); diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c index c67e1123d87bf..3b1c67fc862b2 100644 --- a/sys/kern/sys_aiosp.c +++ b/sys/kern/sys_aiosp.c @@ -27,7 +27,62 @@ */ /* - * Implementation of service pools to support asynchronous I/O + * NetBSD asynchronous I/O service pool implementation + * + * Design overview + * + * Thread pool architecture: + * Each process maintains an aiosp (service pool) with worker threads (aiost) + * Workers are recycled via freelist/active lists to minimize thread creation + * Workers sleep on service_cv until jobs are assigned + * On process termination, all associated service threads are terminated + * + * Job distribution strategy: + * Jobs are initially queued to aiosp->jobs pending distribution + * Regular files: Jobs are grouped by file descriptor for potential coalescing + * Multiple jobs on same fp are assigned to one thread via aiost_file_group + * Enables future optimizations like request merging and vectored I/O + * Nonregular files: Each job gets a dedicated worker (no coalescing) + * Distribution occurs when aiosp_distribute_jobs() is called + * + * Job tracking: + * Hash table (aiocbp_hash) maps userspace aiocb pointers to kernel jobs + * Prevents duplicate submissions of same aiocb + * Enables O(1) lookup for aio_error/aio_return/aio_suspend operations + * Hash collision resolution via chaining (TAILQ per bucket) + * + * Completion notification: + * Twophase notification: waitgroup signaling then signal delivery + * Aiowaitgrouplk attached to each job tracks all waiting suspend operations + * On completion, all registered waitgroups are notified atomically + * Supports both any (wake on first completion) and all (wake when all done) modes + * Waitgroups are reference counted to handle concurrent completion/registration + * + * Thread lifecycle: + * Threads handle both singleton jobs and filegrouped batches + * After processing, threads return to freelist for reuse + * Thread termination uses state machine (none->operation->terminate) + * Abrupt process termination handled via signal checks in cv_wait_sig() + * + * Synchronization model: + * Hierarchical locking: aiosp->mtx > aiost->mtx > job->mtx + * aiosp->mtx: Protects job queues, thread lists, and file group tree + * aiost->mtx: Protects thread state transitions + * job->mtx: Protects completion flag only + * aiowaitgrouplk->mtx: Protects waitgroup array modifications + * + * File group management: + * RB tree (aiost_file_tree) maintains active file groups + * Groups are created ondemand when regular file jobs are distributed + * Groups are destroyed when all jobs for that fp complete + * Enables future enhancements like dynamic job appending during processing + * + * Implementation notes + * + * io_read/write currently use fallback implementations + * Buffer array (job->buf) reserved for future vectored I/O support + * File groups could be extended for list I/O (lio_listio) kernelside batching + * Range locking infrastructure planned but not yet implemented */ #include @@ -68,7 +123,7 @@ static int aiosp_worker_extract(struct aiosp *, struct aiost **); static int io_write(struct aiost *, struct aio_job *); static int io_read(struct aiost *, struct aio_job *); static int io_sync(struct aiost *); -static int io_construct(struct aio_job *, struct file **, +static int uio_construct(struct aio_job *, struct file **, struct iovec *, struct uio *); static int io_write_fallback(struct aio_job *); static int io_read_fallback(struct aio_job *); @@ -110,8 +165,9 @@ RB_PROTOTYPE(aiost_file_tree, aiost_file_group, tree, aiost_file_group_cmp); RB_GENERATE(aiost_file_tree, aiost_file_group, tree, aiost_file_group_cmp); /* - * Group jobs by file handle for coalescing and distribute them among service - * threads + * Group jobs by file descriptor and distribute to service threads. + * Regular files are coalesced per-fp, others get individual threads. + * Must be called with jobs queued in sp->jobs */ int aiosp_distribute_jobs(struct aiosp *sp) @@ -193,9 +249,10 @@ aiosp_distribute_jobs(struct aiosp *sp) fg->queue_size++; } + mutex_enter(&aiost->mtx); aiost->freelist = false; aiost->state = AIOST_STATE_OPERATION; - + mutex_exit(&aiost->mtx); cv_signal(&aiost->service_cv); } @@ -205,12 +262,12 @@ aiosp_distribute_jobs(struct aiosp *sp) } /* - * aiosp_ops represent a collection of operations whose status should be - * tracked. When the user invokes a suspend, we create a new collection, and - * then for each aiost referenced within aiocbp_list, when those operations - * are finished, every aiosp_ops appended to that thread (aiost->ops) gets - * awoken and the completion count incremented. The completion counter can be - * incremeneted posthumously as well. + * Wait for specified AIO operations to complete + * Create a waitgroup to monitor the specified aiocb list. + * Returns when timeout expires or completion criteria met + * + * AIOSP_SUSPEND_ANY return when any job completes + * AIOSP_SUSPEND_ALL return when all jobs complete */ int aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, @@ -239,14 +296,6 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, struct aiowaitgroup *wg = kmem_zalloc(sizeof(*wg), KM_SLEEP); aiowaitgroup_init(wg); - /* - * We want a hash table that tracks jobs, using uptr as a key. We use - * this to track job completion status. How do we handle the case where - * a job is completed with one aiost, then completed, then another job - * enqueued and assigned to that exact aiost. This makes it such that - * both aiosts are assigned to both threads. - */ - mutex_enter(&wg->mtx); for (int i = 0; i < nent; i++) { if (aiocbp_list[i] == NULL) { @@ -323,7 +372,7 @@ aiosp_initialize(struct aiosp *sp) } /* - * + * Extract an available worker thread from pool or create new one */ static int aiosp_worker_extract(struct aiosp *sp, struct aiost **aiost) @@ -445,7 +494,7 @@ aiost_create(struct aiosp *sp, struct aiost **ret) } /* - * + * Process single job without coalescing. */ static void aiost_process_singleton (struct aiost *st) @@ -465,16 +514,15 @@ aiost_process_singleton (struct aiost *st) } mutex_enter(&job->mtx); + aiowaitgrouplk_flush(&job->lk); job->completed = true; mutex_exit(&job->mtx); - aiowaitgrouplk_flush(&job->lk); - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); } /* - * + * Process all jobs in a file group. */ static void aiost_process_fg (struct aiost *st) @@ -512,9 +560,8 @@ aiost_process_fg (struct aiost *st) } /* - * Servicing thread entry point. Process the operation. Notify all those - * blocking on the completion of the operation. Send a signal if necessary. And - * then mark the current servicing thread as free. + * Service thread entry point. Processes assigned jobs until termination. + * Handles both singleton jobs and file-grouped job batches. */ static void aiost_entry(void *arg) @@ -553,25 +600,14 @@ aiost_entry(void *arg) st->state); } - // A MORE LOGICAL SOLUTION FILE GROUPS ARE JUST LISTIO INSIDE - // THE KERNEL (OR CAN THEY NOT BE??? HOW ABOUT ADD EXTRA - // FUNCTIONALITY TO THEM LIKE BEING ABLE TO DYNAMICALLY APPEND - // NEW OPS WHILE EVERYTHING IS IN THE MIDDLE OF BEING - // PROCESSED? NO IT IS NOT. IT IS ABOUT COMBINING OBJECTS THAT - // HAVE TO BLOCK VERSUS OBJECTS THAT DO. ALSO COMBINE AIOSP AND - // AIO TOGETHER THEIR SEPARATENESS IS GETTING ON MY NERVES - // STRIP AWAY USELESS MUMBO AI JUMBO AND MAKE WORK I SHOULD BE - // ABLE TO ACHIEVE CONCURRENCY ACROSS MULTIPLE FILES - // SIMPLIFY AND STREAMLINE DESIGN AND DOCUMENT WHEN NECESSARY - // SEND AN EMAIL OFF TO JASON AND CHISTOS - // IMPLEMENT RANGE LOCKS #1 - - //printf("%d %d\n", mutex_owned(&sp->mtx), mutex_owned(&st->mtx)); - if (st->fg) { + mutex_exit(&st->mtx); aiost_process_fg(st); + mutex_enter(&st->mtx); } else { + mutex_exit(&st->mtx); aiost_process_singleton(st); + mutex_enter(&st->mtx); } st->state = AIOST_STATE_NONE; @@ -583,7 +619,9 @@ aiost_entry(void *arg) * freelist, dance around locks, then iterate loop and block on * st->service_cv */ + mutex_exit(&st->mtx); mutex_enter(&sp->mtx); + mutex_enter(&st->mtx); st->freelist = true; @@ -596,6 +634,7 @@ aiost_entry(void *arg) mutex_exit(&sp->mtx); } + mutex_exit(&st->mtx); mutex_enter(&sp->mtx); if (st->freelist) { @@ -605,11 +644,9 @@ aiost_entry(void *arg) TAILQ_REMOVE(&sp->active, st, list); sp->nthreads_active--; } - sp->nthreads_total--; mutex_exit(&sp->mtx); - mutex_exit(&st->mtx); kthread_exit(0); } @@ -635,7 +672,7 @@ aiost_sigsend(struct proc *p, struct sigevent *sig) } /* - * + * Process write operation for non-blocking jobs. */ static int io_write(struct aiost *aiost, struct aio_job *job) @@ -644,7 +681,7 @@ io_write(struct aiost *aiost, struct aio_job *job) } /* - * + * Process read operation for non-blocking jobs. */ static int io_read(struct aiost *aiost, struct aio_job *job) @@ -653,10 +690,10 @@ io_read(struct aiost *aiost, struct aio_job *job) } /* - * + * Initialize UIO structure for I/O operation. */ static int -io_construct(struct aio_job *job, struct file **fp, struct iovec *aiov, +uio_construct(struct aio_job *job, struct file **fp, struct iovec *aiov, struct uio *auio) { struct aiocb *aiocbp = &job->aiocbp; @@ -686,7 +723,7 @@ io_construct(struct aio_job *job, struct file **fp, struct iovec *aiov, } /* - * + * Perform synchronous write via file operations. */ static int io_write_fallback(struct aio_job *job) @@ -697,8 +734,12 @@ io_write_fallback(struct aio_job *job) struct aiocb *aiocbp; int error; - error = io_construct(job, &fp, &aiov, &auio); + error = uio_construct(job, &fp, &aiov, &auio); if (error) { + if (fp) { + closef(fp); + } + goto done; } @@ -732,7 +773,7 @@ io_write_fallback(struct aio_job *job) } /* - * + * Perform synchronous read via file operations. */ static int io_read_fallback(struct aio_job *job) @@ -743,8 +784,11 @@ io_read_fallback(struct aio_job *job) struct aiocb *aiocbp; int error; - error = io_construct(job, &fp, &aiov, &auio); + error = uio_construct(job, &fp, &aiov, &auio); if (error) { + if (fp) { + closef(fp); + } goto done; } @@ -778,7 +822,7 @@ io_read_fallback(struct aio_job *job) } /* - * process sync/dsync + * Flush file data to stable storage. */ static int io_sync(struct aiost *aiost) @@ -933,7 +977,7 @@ int aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) } /* - * aiocbp hash function + * Hash function for aiocb user pointers. */ static inline u_int aiocbp_hash(const void *uptr) @@ -942,7 +986,7 @@ aiocbp_hash(const void *uptr) } /* - * aiocbp hash lookup + * Find aiocb entry by user pointer. */ int aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, const void *uptr) @@ -952,13 +996,9 @@ aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, const void *uptr) hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; - //printf("searching element with key {%lx} and hash {%x}\n", (uintptr_t)uptr, hash); - mutex_enter(&aiosp->aio_hash_mtx); TAILQ_FOREACH(aiocbp, &aiosp->aio_hash[hash], list) { if (aiocbp->uptr == uptr) { - //printf("element found {%lx} and the job {%lx} {%lx}\n", (uintptr_t)aiocbp, (uintptr_t)aiocbp->job, (uintptr_t)aiocbp->job->aiost); - *aiocbpp = aiocbp; mutex_exit(&aiosp->aio_hash_mtx); return 0; @@ -970,7 +1010,7 @@ aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, const void *uptr) } /* - * aiocbp hash removal + * Remove aiocb entry from hash table. */ int aiocbp_remove(struct aiosp *aiosp, const void *uptr) @@ -995,7 +1035,7 @@ aiocbp_remove(struct aiosp *aiosp, const void *uptr) } /* - * aiocbp hash insertion + * Insert aiocb entry into hash table. */ int aiocbp_insert(struct aiosp *aiosp, struct aiocbp *aiocbp) @@ -1016,8 +1056,6 @@ aiocbp_insert(struct aiosp *aiosp, struct aiocbp *aiocbp) } } - //printf("appending element with key {%x} onto hash {%lx} aiocbp {%lx}\n", hash, (uintptr_t)uptr, (uintptr_t)aiocbp); - TAILQ_INSERT_HEAD(&aiosp->aio_hash[hash], aiocbp, list); mutex_exit(&aiosp->aio_hash_mtx); @@ -1025,7 +1063,7 @@ aiocbp_insert(struct aiosp *aiosp, struct aiocbp *aiocbp) } /* - * aiocbp initialise + * Initialize aiocb hash table. */ int aiocbp_init(struct aiosp *aiosp, u_int hashsize) @@ -1050,7 +1088,7 @@ aiocbp_init(struct aiosp *aiosp, u_int hashsize) } /* - * aiocbp destroy + * Destroy aiocb hash table and free entries. */ void aiocbp_destroy(struct aiosp *aiosp) @@ -1079,7 +1117,7 @@ aiocbp_destroy(struct aiosp *aiosp) } /* - * + * Initialize wait group for suspend operations. */ void aiowaitgroup_init(struct aiowaitgroup *wg) @@ -1093,7 +1131,7 @@ aiowaitgroup_init(struct aiowaitgroup *wg) } /* - * + * Clean up wait group resources. */ void aiowaitgroup_fini(struct aiowaitgroup *wg) @@ -1104,7 +1142,7 @@ aiowaitgroup_fini(struct aiowaitgroup *wg) } /* - * + * Block until wait group signals completion. */ int aiowaitgroup_wait(struct aiowaitgroup *wg, int timo) @@ -1123,7 +1161,7 @@ aiowaitgroup_wait(struct aiowaitgroup *wg, int timo) } /* - * + * Initialize wait group link for job tracking. */ void aiowaitgrouplk_init(struct aiowaitgrouplk *lk) @@ -1135,7 +1173,7 @@ aiowaitgrouplk_init(struct aiowaitgrouplk *lk) } /* - * + * Clean up wait group link resources. */ void aiowaitgrouplk_fini(struct aiowaitgrouplk *lk) @@ -1147,11 +1185,12 @@ aiowaitgrouplk_fini(struct aiowaitgrouplk *lk) } } +/* + * Notify all wait groups of job completion. + */ void aiowaitgrouplk_flush(struct aiowaitgrouplk *lk) { - printf("flushing %lx on %ld\n", (uintptr_t)lk, lk->n); - mutex_enter(&lk->mtx); for (int i = 0; i < lk->n; i++) { struct aiowaitgroup *wg = lk->wgs[i]; @@ -1186,12 +1225,11 @@ aiowaitgrouplk_flush(struct aiowaitgrouplk *lk) } /* - * + * Attach wait group to jobs notification list. */ void aiowaitgroup_join(struct aiowaitgroup *wg, struct aiowaitgrouplk *lk) { - KASSERT(lk->n < lk->s); mutex_enter(&lk->mtx); if (lk->n == lk->s) { size_t new_size = lk->s * lk->s; diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 684d8984a1640..baa1941741868 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -8,10 +8,10 @@ * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE @@ -93,38 +93,38 @@ struct aiocb { /* Structure for tracking the status of a collection of OPS */ struct aiowaitgroup { - kmutex_t mtx; /* Protects this structure */ - kcondvar_t done_cv; /* Signals when a job is complete */ - size_t completed; /* Keeps track of the number of completed jobs */ - size_t total; /* Keeps track of the number of total jobs */ - bool active; - int refcnt; + kmutex_t mtx; /* Protects entire structure */ + kcondvar_t done_cv; /* Signaled when job completes */ + size_t completed; /* Number of completed jobs in this wait group */ + size_t total; /* Total jobs being waited on */ + bool active; /* False after suspend returns/times out */ + int refcnt; /* Reference count */ }; /* */ struct aiowaitgrouplk { - kmutex_t mtx; /* */ - void **wgs; /* Array of ops */ - size_t s; /* Size of ops array */ - size_t n; /* Total number of connected ops */ + kmutex_t mtx; /* Protects wgs array modifications */ + void **wgs; /* Dynamic array of waiting aiowaitgroups */ + size_t s; /* Allocated size of wgs array */ + size_t n; /* Current number of waitgroups */ }; /* Structure of AIO job */ struct aiost; struct buf; struct aio_job { - kmutex_t mtx; /* Protects this structure */ - int aio_op; /* Operation code */ - struct aiocb aiocbp; /* AIO data structure */ - pri_t pri; /* Job priority */ - void *aiocb_uptr; /* User-space pointer for identification of job */ - struct proc *p; /* Process that instantiated the job */ - bool completed; /* Marks the completion status of this job */ - struct aiowaitgrouplk lk; - struct buf **buf; - uint nbuf; + kmutex_t mtx; /* Protects completed flag */ + int aio_op; /* Operation type (AIO_READ/WRITE/SYNC) */ + struct aiocb aiocbp; /* User-visible AIO control block */ + pri_t pri; /* Scheduling priority */ + void *aiocb_uptr; /* User pointer for job identification */ + struct proc *p; /* Originating process */ + bool completed; /* Job completion status */ + struct aiowaitgrouplk lk; /* List of waitgroups waiting on this job */ + struct buf **buf; /* Buffer array for vectored I/O (unused?) */ + uint nbuf; /* Number of buffers (unused?) */ TAILQ_ENTRY(aio_job) list; - struct lio_req *lio; + struct lio_req *lio; /* List I/O request (if part of lio_listio) */ }; #define AIOST_STATE_NONE 0x1 From b273141a9597c0c47918a120426e939af5699865 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Fri, 15 Aug 2025 17:42:26 -0600 Subject: [PATCH 36/53] merge the legacy and aiosp implementations --- sys/kern/files.kern | 1 - sys/kern/sys_aio.c | 1748 ++++++++++++++++++++++++++++-------------- sys/kern/sys_aiosp.c | 1251 ------------------------------ sys/sys/aio.h | 6 - 4 files changed, 1181 insertions(+), 1825 deletions(-) delete mode 100644 sys/kern/sys_aiosp.c diff --git a/sys/kern/files.kern b/sys/kern/files.kern index ec5c2b578ceb0..4c8967d61dcd6 100644 --- a/sys/kern/files.kern +++ b/sys/kern/files.kern @@ -161,7 +161,6 @@ file kern/subr_vmem.c kern file kern/subr_workqueue.c kern file kern/subr_xcall.c kern file kern/sys_aio.c aio -file kern/sys_aiosp.c aiosp file kern/sys_descrip.c kern file kern/sys_epoll.c kern file kern/sys_eventfd.c kern diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 2e60604a83bbd..9bfb3a1afb5ed 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -1,17 +1,17 @@ -/* $NetBSD: sys_aio.c,v 1.50 2024/12/07 02:38:51 riastradh Exp $ */ +/* $NetBSD: sys_aio.c,v 0.00 2025/08/15 12:00:00 ethan4984 Exp $ */ /* - * Copyright (c) 2007 Mindaugas Rasiukevicius + * Copyright (c) 2025 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE @@ -27,20 +27,56 @@ */ /* - * Implementation of POSIX asynchronous I/O. - * Defined in the Base Definitions volume of IEEE Std 1003.1-2001. + * NetBSD asynchronous I/O service pool implementation + * + * Design overview + * + * Thread pool architecture: + * Each process owns an aiosp (service pool) with work threads (aiost). + * Workes are reused via freelist/active lists to avoid churn. + * Workers sleep on service_cv until a job is assigned. + * On process teardown, outstanding working is quiesced and threads are destroyed. + * + * Job distribution: + * Jobs are appended to aiosp->jobs which are then distributed to a worker thread. + * Regular files: Jobs are grouped together by file handle to allow for future + * optimisaton. + * Non-regular files: No grouping. Each jobs is handled directly by a discrete + * worker thread. + * Only regular files are candidates for non-blocking operation, however the + * non-blocking path is not implemented yet. Everything currently falls back to + * blocking I/O + * Distribution is triggered by aiosp_distribute_jobs + * + * Job tracking: + * A hash table (by userspace aiocb pointer) maps aiocb -> kernel job. + * This gives O(1)ish lookup for aio_error/aio_return/aio_suspend. + * Resubmission of the same aiocb updates the mapping. To allow userspace to + * reuse aiocb storage liberally. + * + * File group management: + * RB tree (aiost_file_tree) maintains active file groups. + * Groups are created ondemand when regular file jobs are distributed. + * Groups are destroyed when all jobs for that fp complete. + * Enables future enhancements like dynamic job appending during processing. + * + * Implementation notes + * + * io_read/io_write currently use fallback implementations */ #include __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.50 2024/12/07 02:38:51 riastradh Exp $"); #ifdef _KERNEL_OPT -#include "opt_aiosp.h" #include "opt_ddb.h" #endif #include #include +#include +#include +#include #include #include @@ -52,6 +88,7 @@ __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.50 2024/12/07 02:38:51 riastradh Exp $ #include #include #include +#include #include #include #include @@ -81,15 +118,27 @@ static struct pool aio_job_pool; static struct pool aio_lio_pool; static void * aio_ehook; -static void aio_worker(void *); -static void aio_process(struct aio_job *); -static void aio_sendsig(struct proc *, struct sigevent *); static int aio_enqueue_job(int, void *, struct lio_req *); static void aio_exit(proc_t *, void *); static int sysctl_aio_listio_max(SYSCTLFN_PROTO); static int sysctl_aio_max(SYSCTLFN_PROTO); +/* Service pool functions */ +static int aiost_create(struct aiosp *, struct aiost **); +static int aiost_terminate(struct aiost *); +static void aiost_entry(void *); +static void aiost_sigsend(struct proc *, struct sigevent *); +static int aiosp_worker_extract(struct aiosp *, struct aiost **); + +static int io_write(struct aiost *, struct aio_job *); +static int io_read(struct aiost *, struct aio_job *); +static int io_sync(struct aiost *); +static int uio_construct(struct aio_job *, struct file **, + struct iovec *, struct uio *); +static int io_write_fallback(struct aio_job *); +static int io_read_fallback(struct aio_job *); + static const struct syscall_package aio_syscalls[] = { { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel }, { SYS_aio_error, 0, (sy_call_t *)sys_aio_error }, @@ -102,6 +151,26 @@ static const struct syscall_package aio_syscalls[] = { { 0, 0, NULL }, }; +/* + * Order RB with respect to fp + */ +static int +aiost_file_group_cmp(struct aiost_file_group *a, struct aiost_file_group *b) +{ + if (a == NULL || b == NULL) { + return (a == b) ? 0 : (a ? 1 : -1); + } + + uintptr_t ap = (uintptr_t)a->fp; + uintptr_t bp = (uintptr_t)b->fp; + + return (ap < bp) ? -1 : (ap > bp) ? 1 : 0; +} + +RB_HEAD(aiost_file_tree, aiost_file_group); +RB_PROTOTYPE(aiost_file_tree, aiost_file_group, tree, aiost_file_group_cmp); +RB_GENERATE(aiost_file_tree, aiost_file_group, tree, aiost_file_group_cmp); + /* * Tear down all AIO state. */ @@ -146,9 +215,9 @@ aio_init(void) int error; pool_init(&aio_job_pool, sizeof(struct aio_job), 0, 0, 0, - "aio_jobs_pool", &pool_allocator_nointr, IPL_NONE); + "aio_jobs_pool", &pool_allocator_nointr, IPL_NONE); pool_init(&aio_lio_pool, sizeof(struct lio_req), 0, 0, 0, - "aio_lio_pool", &pool_allocator_nointr, IPL_NONE); + "aio_lio_pool", &pool_allocator_nointr, IPL_NONE); aio_ehook = exithook_establish(aio_exit, NULL); error = syscall_establish(NULL, aio_syscalls); @@ -180,65 +249,36 @@ static int aio_procinit(struct proc *p) { struct aioproc *aio; - struct lwp *l; int error; - vaddr_t uaddr; /* Allocate and initialize AIO structure */ aio = kmem_zalloc(sizeof(struct aioproc), KM_SLEEP); - /* Initialize the aiocbp hash map */ -#ifdef AIOSP + /* Initialize the service pool */ error = aiosp_initialize(&aio->aiosp); if (error) { + kmem_free(aio, sizeof(struct aioproc)); return error; } error = aiocbp_init(&aio->aiosp, 256); if (error) { + aiosp_destroy(&aio->aiosp); + kmem_free(aio, sizeof(struct aioproc)); return error; } -#endif /* Initialize queue and their synchronization structures */ mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE); - cv_init(&aio->aio_worker_cv, "aiowork"); - cv_init(&aio->done_cv, "aiodone"); - TAILQ_INIT(&aio->jobs_queue); - - /* - * Create an AIO worker thread. - * XXX: Currently, AIO thread is not protected against user's actions. - */ - uaddr = uvm_uarea_alloc(); - if (uaddr == 0) { - aio_exit(p, aio); - return SET_ERROR(EAGAIN); - } - error = lwp_create(curlwp, p, uaddr, 0, NULL, 0, aio_worker, - NULL, &l, curlwp->l_class, &curlwp->l_sigmask, &curlwp->l_sigstk); - if (error != 0) { - uvm_uarea_free(uaddr); - aio_exit(p, aio); - return error; - } /* Recheck if we are really first */ mutex_enter(p->p_lock); if (p->p_aio) { mutex_exit(p->p_lock); aio_exit(p, aio); - lwp_exit(l); return 0; } p->p_aio = aio; - - /* Complete the initialization of thread, and run it */ - aio->aio_worker = l; - lwp_lock(l); - lwp_changepri(l, MAXPRI_USER); - setrunnable(l); - /* LWP now unlocked */ mutex_exit(p->p_lock); return 0; @@ -250,7 +290,7 @@ aio_procinit(struct proc *p) static void aio_exit(struct proc *p, void *cookie) { - struct aio_job *a_job; + //struct aio_job *a_job; struct aioproc *aio; if (cookie != NULL) @@ -259,264 +299,1116 @@ aio_exit(struct proc *p, void *cookie) return; /* Free AIO queue */ - while (!TAILQ_EMPTY(&aio->jobs_queue)) { + // IMPLEMENT THIS BUT AIOSP + + /*while (!TAILQ_EMPTY(&aio->jobs_queue)) { a_job = TAILQ_FIRST(&aio->jobs_queue); TAILQ_REMOVE(&aio->jobs_queue, a_job, list); pool_put(&aio_job_pool, a_job); atomic_dec_uint(&aio_jobs_count); - } + }*/ /* Destroy and free the entire AIO data structure */ -#ifdef AIOSP aiocbp_destroy(&aio->aiosp); aiosp_destroy(&aio->aiosp); -#endif - cv_destroy(&aio->aio_worker_cv); - cv_destroy(&aio->done_cv); mutex_destroy(&aio->aio_mtx); kmem_free(aio, sizeof(struct aioproc)); } /* - * AIO worker thread and processor. + * Group jobs by file descriptor and distribute to service threads. + * Regular files are coalesced per-fp, others get individual threads. + * Must be called with jobs queued in sp->jobs + */ +int +aiosp_distribute_jobs(struct aiosp *sp) +{ + struct aio_job *job; + struct file *fp; + int error = 0; + + mutex_enter(&sp->mtx); + if (!sp->jobs_pending) { + mutex_exit(&sp->mtx); + return 0; + } + + struct aio_job *tmp; + TAILQ_FOREACH_SAFE(job, &sp->jobs, list, tmp) { + fp = fd_getfile2(job->p, job->aiocbp.aio_fildes); + if (fp == NULL) { + mutex_exit(&sp->mtx); + error = SET_ERROR(EBADF); + return error; + } + + struct aiost_file_group *fg = NULL; + struct aiost *aiost = NULL; + + if (fp->f_vnode && fp->f_vnode->v_type == VREG) { + struct aiost_file_group find = { 0 }; + find.fp = fp; + fg = RB_FIND(aiost_file_tree, sp->fg_root, &find); + + if (fg == NULL) { + fg = kmem_zalloc(sizeof(*fg), KM_SLEEP); + fg->fp = fp; + fg->vp = fp->f_vnode; + fg->queue_size = 0; + TAILQ_INIT(&fg->queue); + + error = aiosp_worker_extract(sp, &aiost); + if (error) { + kmem_free(fg, sizeof(*fg)); + closef(fp); + mutex_exit(&sp->mtx); + return error; + } + + RB_INSERT(aiost_file_tree, sp->fg_root, fg); + fg->aiost = aiost; + + aiost->fg = fg; + aiost->job = NULL; + } else { + /* + * release fp as it already exists within fg + */ + closef(fp); + aiost = fg->aiost; + } + } else { + error = aiosp_worker_extract(sp, &aiost); + if (error) { + closef(fp); + mutex_exit(&sp->mtx); + return error; + } + + aiost->fg = NULL; + aiost->job = job; + } + + /* + * Move from sp->jobs to fg->jobs + */ + TAILQ_REMOVE(&sp->jobs, job, list); + sp->jobs_pending--; + + if (fg) { + TAILQ_INSERT_TAIL(&fg->queue, job, list); + fg->queue_size++; + } + + mutex_enter(&aiost->mtx); + aiost->freelist = false; + aiost->state = AIOST_STATE_OPERATION; + mutex_exit(&aiost->mtx); + cv_signal(&aiost->service_cv); + } + + mutex_exit(&sp->mtx); + + return error; +} + +/* + * Wait for specified AIO operations to complete + * Create a waitgroup to monitor the specified aiocb list. + * Returns when timeout expires or completion criteria met + * + * AIOSP_SUSPEND_ANY return when any job completes + * AIOSP_SUSPEND_ALL return when all jobs complete + */ +int +aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, + struct timespec *ts, int flags) +{ + struct aio_job *job; + int error = 0; + int timo; + size_t target = 0; + size_t monitor = 0; + + if (ts) { + timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); + if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0) { + timo = 1; + } + + if (timo <= 0) { + error = SET_ERROR(EAGAIN); + return error; + } + } else { + timo = 0; + } + + struct aiowaitgroup *wg = kmem_zalloc(sizeof(*wg), KM_SLEEP); + aiowaitgroup_init(wg); + + mutex_enter(&wg->mtx); + for (int i = 0; i < nent; i++) { + if (aiocbp_list[i] == NULL) { + continue; + } + + struct aiocbp *aiocbp = NULL; + error = aiocbp_lookup(aiosp, &aiocbp, aiocbp_list[i]); + if (error) { + goto done; + } + if (aiocbp == NULL) { + continue; + } + + job = aiocbp->job; + monitor++; + + mutex_enter(&job->mtx); + if (job->completed) { + wg->completed++; + wg->total++; + } else { + aiowaitgroup_join(wg, &job->lk); + } + mutex_exit(&job->mtx); + } + + if (!monitor) { + goto done; + } + + if (flags & AIOSP_SUSPEND_ANY) { + target = 1; + } else if (flags & AIOSP_SUSPEND_ALL) { + target = monitor; + } + + for (; wg->completed < target;) { + error = aiowaitgroup_wait(wg, timo); + if (error) { + goto done; + } + } + +done: + wg->active = false; + wg->refcnt--; + + if (wg->refcnt == 0) { + mutex_exit(&wg->mtx); + aiowaitgroup_fini(wg); + } else { + mutex_exit(&wg->mtx); + } + + return error; +} + +int +aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent, + struct timespec *ts) +{ + struct proc *p = l->l_proc; + struct aioproc *aio = p->p_aio; + struct aiosp *aiosp = &aio->aiosp; + + return aiosp_suspend(aiosp, aiocbp_list, nent, ts, AIOSP_SUSPEND_ANY); +} + +/* + * Initializes a servicing pool. + */ +int +aiosp_initialize(struct aiosp *sp) +{ + mutex_init(&sp->mtx, MUTEX_DEFAULT, IPL_NONE); + TAILQ_INIT(&sp->freelist); + TAILQ_INIT(&sp->active); + TAILQ_INIT(&sp->jobs); + sp->fg_root = kmem_zalloc(sizeof(*sp->fg_root), KM_SLEEP); + RB_INIT(sp->fg_root); + + return 0; +} + +/* + * Extract an available worker thread from pool or create new one + */ +static int +aiosp_worker_extract(struct aiosp *sp, struct aiost **aiost) +{ + int error; + + if (sp->nthreads_free == 0) { + error = aiost_create(sp, aiost); + if (error) { + return error; + } + } else { + *aiost = TAILQ_LAST(&sp->freelist, aiost_list); + } + + TAILQ_REMOVE(&sp->freelist, *aiost, list); + sp->nthreads_free--; + TAILQ_INSERT_TAIL(&sp->active, *aiost, list); + sp->nthreads_active++; + + return 0; +} + +/* + * Each process keeps track of all the service threads instantiated to service + * an asynchronous operation by the process. When a process is terminated we + * must also terminate all of its active and pending asynchronous operation. + */ +int +aiosp_destroy(struct aiosp *sp) +{ + struct aiost *st; + struct aiost *tmp; + int error = 0; + + mutex_enter(&sp->mtx); + + /* + * Terminate and destroy every service thread both free and active. + */ + TAILQ_FOREACH_SAFE(st, &sp->freelist, list, tmp) { + error = aiost_terminate(st); + if (error) { + mutex_exit(&sp->mtx); + return error; + } + + kmem_free(st, sizeof(*st)); + } + + TAILQ_FOREACH_SAFE(st, &sp->active, list, tmp) { + error = aiost_terminate(st); + if (error) { + mutex_exit(&sp->mtx); + return error; + } + + kmem_free(st, sizeof(*st)); + } + + mutex_exit(&sp->mtx); + mutex_destroy(&sp->mtx); + + return 0; +} + +/* + * Enqueue a job for processing by the process's servicing pool + */ +int +aiosp_enqueue_job(struct aiosp *aiosp, struct aio_job *job) +{ + mutex_enter(&aiosp->mtx); + + TAILQ_INSERT_TAIL(&aiosp->jobs, job, list); + aiosp->jobs_pending++; + + mutex_exit(&aiosp->mtx); + + return 0; +} + +/* + * Create and initialise a new servicing thread and append it to the freelist. + */ +static int +aiost_create(struct aiosp *sp, struct aiost **ret) +{ + struct proc *p = curlwp->l_proc; + struct aiost *st; + + st = kmem_zalloc(sizeof(*st), KM_SLEEP); + + mutex_init(&st->mtx, MUTEX_DEFAULT, IPL_NONE); + cv_init(&st->service_cv, "aioservice"); + + st->job = NULL; + st->state = AIOST_STATE_NONE; + st->aiosp = sp; + st->freelist = true; + + TAILQ_INSERT_TAIL(&sp->freelist, st, list); + sp->nthreads_free++; + sp->nthreads_total++; + + int error = kthread_create(PRI_USER, 0, NULL, aiost_entry, + st, &st->lwp, "aio_%d_%ld", p->p_pid, sp->nthreads_total); + if (error) { + return error; + } + + if (ret) { + *ret = st; + } + + return 0; +} + +/* + * Process single job without coalescing. + */ +static void +aiost_process_singleton (struct aiost *st) +{ + struct aio_job *job; + + job = st->job; + KASSERT(job != NULL); + if (job->aio_op & AIO_READ) { + io_read_fallback(job); + } else if (job->aio_op & AIO_WRITE) { + io_write_fallback(job); + } else if (job->aio_op & AIO_SYNC) { + io_sync(st); + } else { + panic("aio_process: invalid operation code\n"); + } + + mutex_enter(&job->mtx); + aiowaitgrouplk_flush(&job->lk); + job->completed = true; + mutex_exit(&job->mtx); + + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); +} + +/* + * Process all jobs in a file group. + */ +static void +aiost_process_fg (struct aiost *st) +{ + struct aiosp *sp = st->aiosp; + struct aiost_file_group *fg = st->fg; + struct aio_job *job; + + struct aio_job *tmp; + TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { + if (job->aio_op & AIO_READ) { + io_read(st, job); + } else if (job->aio_op & AIO_WRITE) { + io_write(st, job); + } else if (job->aio_op & AIO_SYNC) { + io_sync(st); + } else { + panic("aio_process: invalid operation code\n"); + } + + mutex_enter(&job->mtx); + job->completed = true; + mutex_exit(&job->mtx); + + aiowaitgrouplk_flush(&job->lk); + + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + } + + mutex_enter(&sp->mtx); + RB_REMOVE(aiost_file_tree, sp->fg_root, fg); + closef(fg->fp); + kmem_free(fg, sizeof(*fg)); + mutex_exit(&sp->mtx); +} + +/* + * Service thread entry point. Processes assigned jobs until termination. + * Handles both singleton jobs and file-grouped job batches. + */ +static void +aiost_entry(void *arg) +{ + struct aiost *st = arg; + struct aiosp *sp = st->aiosp; + int error; + + /* + * We want to handle abrupt process terminations effectively. We use + * st->exit to indicate that the thread must exit. When a thread is + * terminated aiost_terminate(st) unblocks those sleeping on + * st->service_cv + */ + mutex_enter(&st->mtx); + for(;;) { + for (; st->state == AIOST_STATE_NONE;) { + error = cv_wait_sig(&st->service_cv, &st->mtx); + if (error) { + /* + * Thread was interrupt. Check for pending exit + * or suspension + */ + mutex_exit(&st->mtx); + lwp_userret(curlwp); + mutex_enter(&st->mtx); + } + } + + if (st->state == AIOST_STATE_TERMINATE) { + break; + } + + if (st->state != AIOST_STATE_OPERATION) { + panic("aio_process: invalid aiost state {%x}\n", + st->state); + } + + if (st->fg) { + mutex_exit(&st->mtx); + aiost_process_fg(st); + mutex_enter(&st->mtx); + } else { + mutex_exit(&st->mtx); + aiost_process_singleton(st); + mutex_enter(&st->mtx); + } + + st->state = AIOST_STATE_NONE; + st->job = NULL; + st->fg = NULL; + + /* + * Remove st from list of active service threads, append to + * freelist, dance around locks, then iterate loop and block on + * st->service_cv + */ + mutex_exit(&st->mtx); + mutex_enter(&sp->mtx); + mutex_enter(&st->mtx); + + st->freelist = true; + + TAILQ_REMOVE(&sp->active, st, list); + sp->nthreads_active--; + + TAILQ_INSERT_TAIL(&sp->freelist, st, list); + sp->nthreads_free++; + + mutex_exit(&sp->mtx); + } + + mutex_exit(&st->mtx); + mutex_enter(&sp->mtx); + + if (st->freelist) { + TAILQ_REMOVE(&sp->freelist, st, list); + sp->nthreads_free--; + } else { + TAILQ_REMOVE(&sp->active, st, list); + sp->nthreads_active--; + } + sp->nthreads_total--; + + mutex_exit(&sp->mtx); + kthread_exit(0); +} + +/* + * send AIO signal. + */ +static void +aiost_sigsend(struct proc *p, struct sigevent *sig) +{ + ksiginfo_t ksi; + + if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE) + return; + + KSI_INIT(&ksi); + ksi.ksi_signo = sig->sigev_signo; + ksi.ksi_code = SI_ASYNCIO; + ksi.ksi_value = sig->sigev_value; + + mutex_enter(&proc_lock); + kpsignal(p, &ksi, NULL); + mutex_exit(&proc_lock); +} + +/* + * Process write operation for non-blocking jobs. + */ +static int +io_write(struct aiost *aiost, struct aio_job *job) +{ + return io_write_fallback(job); +} + +/* + * Process read operation for non-blocking jobs. + */ +static int +io_read(struct aiost *aiost, struct aio_job *job) +{ + return io_read_fallback(job); +} + +/* + * Initialize UIO structure for I/O operation. + */ +static int +uio_construct(struct aio_job *job, struct file **fp, struct iovec *aiov, + struct uio *auio) +{ + struct aiocb *aiocbp = &job->aiocbp; + int fd = aiocbp->aio_fildes; + int error = 0; + + if (aiocbp->aio_nbytes > SSIZE_MAX) { + error = SET_ERROR(EINVAL); + return error; + } + + *fp = fd_getfile2(job->p, fd); + if (*fp == NULL) { + error = SET_ERROR(EBADF); + return error; + } + + aiov->iov_base = aiocbp->aio_buf; + aiov->iov_len = aiocbp->aio_nbytes; + auio->uio_iov = aiov; + auio->uio_iovcnt = 1; + auio->uio_resid = aiocbp->aio_nbytes; + auio->uio_offset = aiocbp->aio_offset; + auio->uio_vmspace = job->p->p_vmspace; + + return 0; +} + +/* + * Perform synchronous write via file operations. + */ +static int +io_write_fallback(struct aio_job *job) +{ + struct file *fp; + struct iovec aiov; + struct uio auio; + struct aiocb *aiocbp; + int error; + + error = uio_construct(job, &fp, &aiov, &auio); + if (error) { + if (fp) { + closef(fp); + } + + goto done; + } + + /* + * Perform write + */ + aiocbp = &job->aiocbp; + KASSERT(job->aio_op & AIO_WRITE); + + if ((fp->f_flag & FWRITE) == 0) { + closef(fp); + error = SET_ERROR(EBADF); + goto done; + } + auio.uio_rw = UIO_WRITE; + error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, + &auio, fp->f_cred, FOF_UPDATE_OFFSET); + + closef(fp); + + /* + * Store the result value + */ + job->aiocbp.aio_nbytes -= auio.uio_resid; + job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; +done: + job->aiocbp._errno = error; + job->aiocbp._state = JOB_DONE; + + return 0; +} + +/* + * Perform synchronous read via file operations. + */ +static int +io_read_fallback(struct aio_job *job) +{ + struct file *fp; + struct iovec aiov; + struct uio auio; + struct aiocb *aiocbp; + int error; + + error = uio_construct(job, &fp, &aiov, &auio); + if (error) { + if (fp) { + closef(fp); + } + goto done; + } + + /* + * Perform read + */ + aiocbp = &job->aiocbp; + KASSERT((job->aio_op & AIO_WRITE) == 0); + + if ((fp->f_flag & FREAD) == 0) { + closef(fp); + error = SET_ERROR(EBADF); + goto done; + } + auio.uio_rw = UIO_READ; + error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, + &auio, fp->f_cred, FOF_UPDATE_OFFSET); + + closef(fp); + + /* + * Store the result value + */ + job->aiocbp.aio_nbytes -= auio.uio_resid; + job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; +done: + job->aiocbp._errno = error; + job->aiocbp._state = JOB_DONE; + + return 0; +} + +/* + * Flush file data to stable storage. + */ +static int +io_sync(struct aiost *aiost) +{ + struct aio_job *job = aiost->job; + struct aiocb *aiocbp = &job->aiocbp; + struct file *fp; + int fd = aiocbp->aio_fildes; + int error = 0; + + /* + * Perform a file sync operation + */ + struct vnode *vp; + + if ((error = fd_getvnode(fd, &fp)) != 0) { + goto done; + } + + if ((fp->f_flag & FWRITE) == 0) { + fd_putfile(fd); + error = SET_ERROR(EBADF); + goto done; + } + + vp = fp->f_vnode; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + if (job->aio_op & AIO_DSYNC) { + error = VOP_FSYNC(vp, fp->f_cred, + FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); + } else if (job->aio_op & AIO_SYNC) { + error = VOP_FSYNC(vp, fp->f_cred, + FSYNC_WAIT, 0, 0); + } + VOP_UNLOCK(vp); + fd_putfile(fd); + + /* + * Store the result value + */ + job->aiocbp._retval = (error == 0) ? 0 : -1; +done: + job->aiocbp._errno = error; + job->aiocbp._state = JOB_DONE; + + copyout(&job->aiocbp, job->aiocb_uptr, + sizeof(struct aiocb)); + + return 0; +} + +/* + * Destroy a servicing thread. Set st->exit high such that when we unblock the + * thread blocking on st->service_cv it will invoke an exit routine within + * aiost_entry. + */ +static int +aiost_terminate(struct aiost *st) +{ + int error = 0; + + mutex_enter(&st->mtx); + + st->state = AIOST_STATE_TERMINATE; + + mutex_exit(&st->mtx); + + cv_signal(&st->service_cv); + kthread_join(st->lwp); + + cv_destroy(&st->service_cv); + mutex_destroy(&st->mtx); + + return error; +} + +/* + * Ensure that the same job can not be enqueued twice. + */ +int +aiosp_validate_conflicts(struct aiosp *aiosp, const void *uptr) +{ + struct aiost *st; + + mutex_enter(&aiosp->mtx); + + /* check active threads */ + TAILQ_FOREACH(st, &aiosp->active, list) { + KASSERT(st->job); + if (st->job->aiocb_uptr == uptr) { + mutex_exit(&aiosp->mtx); + return EINVAL; + } + } + + /* no need to check freelist threads as they have no jobs */ + + mutex_exit(&aiosp->mtx); + return 0; +} + +/* + * Get error status of async I/O operation + */ +int aiosp_error(struct aiosp *aiosp, const void *uptr, register_t *retval) +{ + struct aiocbp *aiocbp = NULL; + struct aio_job *job; + int error; + + error = aiocbp_lookup(aiosp, &aiocbp, uptr); + if (error) { + return error; + } + + job = aiocbp->job; + if (job->aiocbp._state == JOB_NONE) { + return SET_ERROR(EINVAL); + } + + *retval = job->aiocbp._errno; + + return error; +} + +/* + * Get return value of completed async I/O operation + */ +int aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) +{ + struct aiocbp *aiocbp = NULL; + struct aio_job *job; + int error; + + error = aiocbp_lookup(aiosp, &aiocbp, uptr); + if (error) { + return error; + } + job = aiocbp->job; + + if (job->aiocbp._errno == EINPROGRESS || job->aiocbp._state != JOB_DONE) { + return SET_ERROR(EINVAL); + } + + *retval = job->aiocbp._retval; + + job->aiocbp._errno = 0; + job->aiocbp._retval = -1; + job->aiocbp._state = JOB_NONE; + + return 0; +} + +/* + * Hash function for aiocb user pointers. + */ +static inline u_int +aiocbp_hash(const void *uptr) +{ + return hash32_buf(&uptr, sizeof(uptr), HASH32_BUF_INIT); +} + +/* + * Find aiocb entry by user pointer. + */ +int +aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, const void *uptr) +{ + struct aiocbp *aiocbp; + u_int hash; + + hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; + + mutex_enter(&aiosp->aio_hash_mtx); + TAILQ_FOREACH(aiocbp, &aiosp->aio_hash[hash], list) { + if (aiocbp->uptr == uptr) { + *aiocbpp = aiocbp; + mutex_exit(&aiosp->aio_hash_mtx); + return 0; + } + } + mutex_exit(&aiosp->aio_hash_mtx); + + return ENOENT; +} + +/* + * Remove aiocb entry from hash table. + */ +int +aiocbp_remove(struct aiosp *aiosp, const void *uptr) +{ + struct aiocbp *aiocbp; + u_int hash; + + hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; + + struct aiocbp *tmp; + mutex_enter(&aiosp->aio_hash_mtx); + TAILQ_FOREACH_SAFE(aiocbp, &aiosp->aio_hash[hash], list, tmp) { + if (aiocbp->uptr == uptr) { + TAILQ_REMOVE(&aiosp->aio_hash[hash], aiocbp, list); + mutex_exit(&aiosp->aio_hash_mtx); + return 0; + } + } + mutex_exit(&aiosp->aio_hash_mtx); + + return ENOENT; +} + +/* + * Insert aiocb entry into hash table. + */ +int +aiocbp_insert(struct aiosp *aiosp, struct aiocbp *aiocbp) +{ + struct aiocbp *found; + const void *uptr; + u_int hash; + + uptr = aiocbp->uptr; + hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; + + mutex_enter(&aiosp->aio_hash_mtx); + TAILQ_FOREACH(found, &aiosp->aio_hash[hash], list) { + if (found->uptr == uptr) { + found->job = aiocbp->job; + mutex_exit(&aiosp->aio_hash_mtx); + return EEXIST; + } + } + + TAILQ_INSERT_HEAD(&aiosp->aio_hash[hash], aiocbp, list); + mutex_exit(&aiosp->aio_hash_mtx); + + return 0; +} + +/* + * Initialize aiocb hash table. */ -static void -aio_worker(void *arg) +int +aiocbp_init(struct aiosp *aiosp, u_int hashsize) { - struct proc *p = curlwp->l_proc; - struct aioproc *aio = p->p_aio; - struct aio_job *a_job; - struct lio_req *lio; - sigset_t oss, nss; - int error __diagused, refcnt; + if (!powerof2(hashsize)) { + return EINVAL; + } - /* - * Make an empty signal mask, so it - * handles only SIGKILL and SIGSTOP. - */ - sigfillset(&nss); - mutex_enter(p->p_lock); - error = sigprocmask1(curlwp, SIG_SETMASK, &nss, &oss); - mutex_exit(p->p_lock); - KASSERT(error == 0); + aiosp->aio_hash = kmem_zalloc(hashsize * sizeof(*aiosp->aio_hash), + KM_SLEEP); - for (;;) { - /* - * Loop for each job in the queue. If there - * are no jobs then sleep. - */ - mutex_enter(&aio->aio_mtx); - while ((a_job = TAILQ_FIRST(&aio->jobs_queue)) == NULL) { - if (cv_wait_sig(&aio->aio_worker_cv, &aio->aio_mtx)) { - /* - * Thread was interrupted - check for - * pending exit or suspend. - */ - mutex_exit(&aio->aio_mtx); - lwp_userret(curlwp); - mutex_enter(&aio->aio_mtx); - } - } + aiosp->aio_hash_mask = hashsize - 1; + aiosp->aio_hash_size = hashsize; - /* Take the job from the queue */ - aio->curjob = a_job; - TAILQ_REMOVE(&aio->jobs_queue, a_job, list); + mutex_init(&aiosp->aio_hash_mtx, MUTEX_DEFAULT, IPL_NONE); - atomic_dec_uint(&aio_jobs_count); - aio->jobs_count--; + for (size_t i = 0; i < hashsize; i++) { + TAILQ_INIT(&aiosp->aio_hash[i]); + } - mutex_exit(&aio->aio_mtx); + return 0; +} - /* Process an AIO operation */ - aio_process(a_job); +/* + * Destroy aiocb hash table and free entries. + */ +void +aiocbp_destroy(struct aiosp *aiosp) +{ + if (aiosp->aio_hash == NULL) { + return; + } - /* Copy data structure back to the user-space */ - (void)copyout(&a_job->aiocbp, a_job->aiocb_uptr, - sizeof(struct aiocb)); + struct aiocbp *aiocbp; - mutex_enter(&aio->aio_mtx); - KASSERT(aio->curjob == a_job); - aio->curjob = NULL; + mutex_enter(&aiosp->aio_hash_mtx); + for (size_t i = 0; i < aiosp->aio_hash_size; i++) { + struct aiocbp *tmp; + TAILQ_FOREACH_SAFE(aiocbp, &aiosp->aio_hash[i], list, tmp) { + TAILQ_REMOVE(&aiosp->aio_hash[i], aiocbp, list); + kmem_free(aiocbp, sizeof(*aiocbp)); + } + } - /* Decrease a reference counter, if there is a LIO structure */ - lio = a_job->lio; - refcnt = (lio != NULL ? --lio->refcnt : -1); + kmem_free(aiosp->aio_hash, + aiosp->aio_hash_size * sizeof(*aiosp->aio_hash)); + aiosp->aio_hash = NULL; + aiosp->aio_hash_mask = 0; + aiosp->aio_hash_size = 0; + mutex_exit(&aiosp->aio_hash_mtx); +} - /* Notify all suspenders */ - cv_broadcast(&aio->done_cv); - mutex_exit(&aio->aio_mtx); +/* + * Initialize wait group for suspend operations. + */ +void +aiowaitgroup_init(struct aiowaitgroup *wg) +{ + wg->completed = 0; + wg->total = 0; + wg->refcnt = 1; + wg->active = true; + cv_init(&wg->done_cv, "aiodone"); + mutex_init(&wg->mtx, MUTEX_DEFAULT, IPL_NONE); +} - /* Send a signal, if any */ - aio_sendsig(p, &a_job->aiocbp.aio_sigevent); +/* + * Clean up wait group resources. + */ +void +aiowaitgroup_fini(struct aiowaitgroup *wg) +{ + cv_destroy(&wg->done_cv); + mutex_destroy(&wg->mtx); + kmem_free(wg, sizeof(*wg)); +} - /* Destroy the LIO structure */ - if (refcnt == 0) { - aio_sendsig(p, &lio->sig); - pool_put(&aio_lio_pool, lio); +/* + * Block until wait group signals completion. + */ +int +aiowaitgroup_wait(struct aiowaitgroup *wg, int timo) +{ + int error; + + error = cv_timedwait_sig(&wg->done_cv, &wg->mtx, timo); + if (error) { + if (error == EWOULDBLOCK) { + error = SET_ERROR(EAGAIN); } - - mutex_destroy(&a_job->mtx); -#ifdef AIOSP - aiowaitgrouplk_fini(&a_job->lk); -#endif - /* Destroy the job */ - pool_put(&aio_job_pool, a_job); + return error; } - /* NOTREACHED */ + return 0; } -static void -aio_process(struct aio_job *a_job) +/* + * Initialize wait group link for job tracking. + */ +void +aiowaitgrouplk_init(struct aiowaitgrouplk *lk) { - struct proc *p = curlwp->l_proc; - struct aiocb *aiocbp = &a_job->aiocbp; - struct file *fp; - int fd = aiocbp->aio_fildes; - int error = 0; - - KASSERT(a_job->aio_op != 0); - - if ((a_job->aio_op & (AIO_READ | AIO_WRITE)) != 0) { - struct iovec aiov; - struct uio auio; + mutex_init(&lk->mtx, MUTEX_DEFAULT, IPL_NONE); + lk->n = 0; + lk->s = 2; + lk->wgs = kmem_alloc(sizeof(*lk->wgs) * lk->s, KM_SLEEP); +} - if (aiocbp->aio_nbytes > SSIZE_MAX) { - error = SET_ERROR(EINVAL); - goto done; - } +/* + * Clean up wait group link resources. + */ +void +aiowaitgrouplk_fini(struct aiowaitgrouplk *lk) +{ + mutex_destroy(&lk->mtx); - fp = fd_getfile(fd); - if (fp == NULL) { - error = SET_ERROR(EBADF); - goto done; - } + if (lk->s) { + kmem_free(lk->wgs, sizeof(*lk->wgs) * lk->s); + } +} - aiov.iov_base = (void *)(uintptr_t)aiocbp->aio_buf; - aiov.iov_len = aiocbp->aio_nbytes; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_resid = aiocbp->aio_nbytes; - auio.uio_vmspace = p->p_vmspace; - - if (a_job->aio_op & AIO_READ) { - /* - * Perform a Read operation - */ - KASSERT((a_job->aio_op & AIO_WRITE) == 0); - - if ((fp->f_flag & FREAD) == 0) { - fd_putfile(fd); - error = SET_ERROR(EBADF); - goto done; - } - auio.uio_rw = UIO_READ; - error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, - &auio, fp->f_cred, FOF_UPDATE_OFFSET); - } else { - /* - * Perform a Write operation - */ - KASSERT(a_job->aio_op & AIO_WRITE); - - if ((fp->f_flag & FWRITE) == 0) { - fd_putfile(fd); - error = SET_ERROR(EBADF); - goto done; - } - auio.uio_rw = UIO_WRITE; - error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, - &auio, fp->f_cred, FOF_UPDATE_OFFSET); +/* + * Notify all wait groups of job completion. + */ +void +aiowaitgrouplk_flush(struct aiowaitgrouplk *lk) +{ + mutex_enter(&lk->mtx); + for (int i = 0; i < lk->n; i++) { + struct aiowaitgroup *wg = lk->wgs[i]; + if (wg == NULL) { + continue; } - fd_putfile(fd); - - /* Store the result value */ - a_job->aiocbp.aio_nbytes -= auio.uio_resid; - a_job->aiocbp._retval = (error == 0) ? - a_job->aiocbp.aio_nbytes : -1; - } else if ((a_job->aio_op & (AIO_SYNC | AIO_DSYNC)) != 0) { - /* - * Perform a file Sync operation - */ - struct vnode *vp; - - if ((error = fd_getvnode(fd, &fp)) != 0) - goto done; + mutex_enter(&wg->mtx); - if ((fp->f_flag & FWRITE) == 0) { - fd_putfile(fd); - error = SET_ERROR(EBADF); - goto done; + if (wg->active) { + wg->completed++; + cv_signal(&wg->done_cv); } - vp = fp->f_vnode; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - if (a_job->aio_op & AIO_DSYNC) { - error = VOP_FSYNC(vp, fp->f_cred, - FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); - } else if (a_job->aio_op & AIO_SYNC) { - error = VOP_FSYNC(vp, fp->f_cred, - FSYNC_WAIT, 0, 0); + if (--wg->refcnt == 0) { + mutex_exit(&wg->mtx); + aiowaitgroup_fini(wg); + } else { + mutex_exit(&wg->mtx); } - VOP_UNLOCK(vp); - fd_putfile(fd); + } - /* Store the result value */ - a_job->aiocbp._retval = (error == 0) ? 0 : -1; + if (lk->n) { + kmem_free(lk->wgs, sizeof(*lk->wgs) * lk->s); - } else - panic("aio_process: invalid operation code\n"); + lk->n = 0; + lk->s = 2; + lk->wgs = kmem_alloc(sizeof(*lk->wgs) * lk->s, KM_SLEEP); + } -done: - /* Job is done, set the error, if any */ - a_job->aiocbp._errno = error; - a_job->aiocbp._state = JOB_DONE; + mutex_exit(&lk->mtx); } /* - * Send AIO signal. + * Attach wait group to jobs notification list. */ -static void -aio_sendsig(struct proc *p, struct sigevent *sig) +void +aiowaitgroup_join(struct aiowaitgroup *wg, struct aiowaitgrouplk *lk) { - ksiginfo_t ksi; - - if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE) - return; + mutex_enter(&lk->mtx); + if (lk->n == lk->s) { + size_t new_size = lk->s * lk->s; - KSI_INIT(&ksi); - ksi.ksi_signo = sig->sigev_signo; - ksi.ksi_code = SI_ASYNCIO; - ksi.ksi_value = sig->sigev_value; - mutex_enter(&proc_lock); - kpsignal(p, &ksi, NULL); - mutex_exit(&proc_lock); -} + void **new_wgs = kmem_zalloc(new_size * + sizeof(*new_wgs), KM_SLEEP); -/* - * The same job can be enqueued twice. So ensure that it does not exist - */ -#ifndef AIOSP -static int -aio_validate_conflicts(struct aioproc *aio, void *uptr) -{ - mutex_enter(&aio->aio_mtx); + memcpy(new_wgs, lk->wgs, lk->n * sizeof(*lk->wgs)); + kmem_free(lk->wgs, lk->s * sizeof(*lk->wgs)); - struct aio_job *a_job; - TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { - if (a_job->aiocb_uptr != uptr) { - continue; - } - mutex_exit(&aio->aio_mtx); - return EINVAL; + lk->s = new_size; + lk->wgs = new_wgs; } - - mutex_exit(&aio->aio_mtx); - - return 0; + lk->wgs[lk->n] = wg; + lk->n++; + wg->total++; + wg->refcnt++; + mutex_exit(&lk->mtx); } -#endif /* * Enqueue the job. @@ -543,7 +1435,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) /* Check if signal is set, and validate it */ sig = &aiocb.aio_sigevent; if (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG || - sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA) + sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA) return SET_ERROR(EINVAL); /* Buffer and byte count */ @@ -560,7 +1452,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) op = AIO_READ; else return (aiocb.aio_lio_opcode == LIO_NOP) ? 0 : - SET_ERROR(EINVAL); + SET_ERROR(EINVAL); } else { KASSERT(lio == NULL); } @@ -571,11 +1463,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) */ aio = p->p_aio; if (aio) { -#ifdef AIOSP error = aiosp_validate_conflicts(&aio->aiosp, aiocb_uptr); -#else - error = aio_validate_conflicts(aio, aiocb_uptr); -#endif if (error) { return SET_ERROR(error); } @@ -615,9 +1503,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) a_job->aio_op |= op; a_job->lio = lio; mutex_init(&a_job->mtx, MUTEX_DEFAULT, IPL_NONE); -#ifdef AIOSP aiowaitgrouplk_init(&a_job->lk); -#endif /* * Add the job to the queue, update the counters, and @@ -627,14 +1513,13 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) /* Fail, if the limit was reached */ if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max || - aio->jobs_count >= aio_listio_max) { + aio->jobs_count >= aio_listio_max) { atomic_dec_uint(&aio_jobs_count); mutex_exit(&aio->aio_mtx); pool_put(&aio_job_pool, a_job); return SET_ERROR(EAGAIN); } -#ifdef AIOSP a_job->pri = PRI_KTHREAD; a_job->p = curlwp->l_proc; @@ -653,14 +1538,13 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) if (error) { return SET_ERROR(error); } -#else - TAILQ_INSERT_TAIL(&aio->jobs_queue, a_job, list); + + mutex_enter(&aio->aio_mtx); aio->jobs_count++; if (lio) lio->refcnt++; mutex_exit(&aio->aio_mtx); - cv_signal(&aio->aio_worker_cv); -#endif + /* * One would handle the errors only with aio_error() function. * This way is appropriate according to POSIX. @@ -674,117 +1558,19 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) int sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, - register_t *retval) + register_t *retval) { /* { syscallarg(int) fildes; syscallarg(struct aiocb *) aiocbp; } */ - struct proc *p = l->l_proc; - struct aioproc *aio; - struct aio_job *a_job; - struct aiocb *aiocbp_ptr; - struct lio_req *lio; - struct filedesc *fdp = p->p_fd; - unsigned int cn, errcnt, fildes; - fdtab_t *dt; - - TAILQ_HEAD(, aio_job) tmp_jobs_list; - - /* Check for invalid file descriptor */ - fildes = (unsigned int)SCARG(uap, fildes); - dt = atomic_load_consume(&fdp->fd_dt); - if (fildes >= dt->dt_nfiles) - return SET_ERROR(EBADF); - if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) - return SET_ERROR(EBADF); - - /* Check if AIO structure is initialized */ - if (p->p_aio == NULL) { - *retval = AIO_NOTCANCELED; - return 0; - } - - aio = p->p_aio; - aiocbp_ptr = (struct aiocb *)SCARG(uap, aiocbp); - - mutex_enter(&aio->aio_mtx); - - /* Cancel the jobs, and remove them from the queue */ - cn = 0; - TAILQ_INIT(&tmp_jobs_list); - TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { - if (aiocbp_ptr) { - if (aiocbp_ptr != a_job->aiocb_uptr) - continue; - if (fildes != a_job->aiocbp.aio_fildes) { - mutex_exit(&aio->aio_mtx); - return SET_ERROR(EBADF); - } - } else if (a_job->aiocbp.aio_fildes != fildes) - continue; - - TAILQ_REMOVE(&aio->jobs_queue, a_job, list); - TAILQ_INSERT_TAIL(&tmp_jobs_list, a_job, list); - - /* Decrease the counters */ - atomic_dec_uint(&aio_jobs_count); - aio->jobs_count--; - lio = a_job->lio; - if (lio != NULL && --lio->refcnt != 0) - a_job->lio = NULL; - - cn++; - if (aiocbp_ptr) - break; - } - - /* There are canceled jobs */ - if (cn) - *retval = AIO_CANCELED; - - /* We cannot cancel current job */ - a_job = aio->curjob; - if (a_job && ((a_job->aiocbp.aio_fildes == fildes) || - (a_job->aiocb_uptr == aiocbp_ptr))) - *retval = AIO_NOTCANCELED; - - mutex_exit(&aio->aio_mtx); - - /* Free the jobs after the lock */ - errcnt = 0; - while (!TAILQ_EMPTY(&tmp_jobs_list)) { - a_job = TAILQ_FIRST(&tmp_jobs_list); - TAILQ_REMOVE(&tmp_jobs_list, a_job, list); - /* Set the errno and copy structures back to the user-space */ - a_job->aiocbp._errno = SET_ERROR(ECANCELED); - a_job->aiocbp._state = JOB_DONE; - if (copyout(&a_job->aiocbp, a_job->aiocb_uptr, - sizeof(struct aiocb))) - errcnt++; - /* Send a signal if any */ - aio_sendsig(p, &a_job->aiocbp.aio_sigevent); - if (a_job->lio) { - lio = a_job->lio; - aio_sendsig(p, &lio->sig); - pool_put(&aio_lio_pool, lio); - } - pool_put(&aio_job_pool, a_job); - } - - if (errcnt) - return SET_ERROR(EFAULT); - - /* Set a correct return value */ - if (*retval == 0) - *retval = AIO_ALLDONE; return 0; } int sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, - register_t *retval) + register_t *retval) { /* { syscallarg(const struct aiocb *) aiocbp; @@ -795,32 +1581,13 @@ sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, if (aio == NULL) return SET_ERROR(EINVAL); -#ifdef AIOSP const void *uptr = SCARG(uap, aiocbp); return aiosp_error(&aio->aiosp, uptr, retval); -#else - struct aiocb aiocbp; - int error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); - if (error) { - printf("WHAT!\n"); - return error; - } - - if (aiocbp._state == JOB_NONE) { - printf("WHA!T!\n"); - return SET_ERROR(EINVAL); - } - - *retval = aiocbp._errno; - - printf("WHY!!\n"); - return 0; -#endif } int sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap, - register_t *retval) + register_t *retval) { /* { syscallarg(int) op; @@ -838,21 +1605,19 @@ sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap, int sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap, - register_t *retval) + register_t *retval) { int error; error = aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); -#ifdef AIOSP struct proc *p = curlwp->l_proc; struct aioproc *aio = p->p_aio; error = aiosp_distribute_jobs(&aio->aiosp); -#endif return error; } int sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, - register_t *retval) + register_t *retval) { /* { syscallarg(struct aiocb *) aiocbp; @@ -864,38 +1629,13 @@ sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, return SET_ERROR(EINVAL); } -#ifdef AIOSP const void *uptr = SCARG(uap, aiocbp); return aiosp_return(&aio->aiosp, uptr, retval); -#else - struct aiocb aiocbp; - int error; - error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb)); - if (error) { - return error; - } - - printf("WHAT!\n"); - - if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE) { - return SET_ERROR(EINVAL); - } - - *retval = aiocbp._retval; - - /* Reset the internal variables */ - aiocbp._errno = 0; - aiocbp._retval = -1; - aiocbp._state = JOB_NONE; - error = copyout(&aiocbp, SCARG(uap, aiocbp), sizeof(struct aiocb)); - - return error; -#endif } int sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, - register_t *retval) + register_t *retval) { /* { syscallarg(const struct aiocb *const[]) list; @@ -913,7 +1653,7 @@ sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, if (SCARG(uap, timeout)) { /* Convert timespec to ticks */ error = copyin(SCARG(uap, timeout), &ts, - sizeof(struct timespec)); + sizeof(struct timespec)); if (error) return error; } @@ -922,108 +1662,33 @@ sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, error = copyin(SCARG(uap, list), list, nent * sizeof(*list)); if (error) goto out; -#ifdef AIOSP + struct proc *p = l->l_proc; struct aioproc *aio = p->p_aio; KASSERT(aio); error = aiosp_suspend(&aio->aiosp, list, nent, SCARG(uap, timeout) ? &ts : NULL, AIOSP_SUSPEND_ALL); -#else - error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL); -#endif out: kmem_free(list, nent * sizeof(*list)); return error; } -int -aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent, - struct timespec *ts) -{ - struct proc *p = l->l_proc; - struct aioproc *aio; - struct aio_job *a_job; - int i, error, timo; - - if (p->p_aio == NULL) - return SET_ERROR(EAGAIN); - aio = p->p_aio; - - if (ts) { - timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); - if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0) - timo = 1; - if (timo <= 0) - return SET_ERROR(EAGAIN); - } else - timo = 0; - - mutex_enter(&aio->aio_mtx); - for (;;) { - for (i = 0; i < nent; i++) { - - /* Skip NULL entries */ - if (aiocbp_list[i] == NULL) - continue; - - /* Skip current job */ - if (aio->curjob) { - a_job = aio->curjob; - if (a_job->aiocb_uptr == aiocbp_list[i]) - continue; - } - - /* Look for a job in the queue */ - TAILQ_FOREACH(a_job, &aio->jobs_queue, list) - if (a_job->aiocb_uptr == aiocbp_list[i]) - break; - - if (a_job == NULL) { - struct aiocb aiocbp; - - mutex_exit(&aio->aio_mtx); - - /* Check if the job is done. */ - error = copyin(aiocbp_list[i], &aiocbp, - sizeof(struct aiocb)); - if (error == 0 && aiocbp._state != JOB_DONE) { - mutex_enter(&aio->aio_mtx); - continue; - } - return error; - } - } - - /* Wait for a signal or when timeout occurs */ - error = cv_timedwait_sig(&aio->done_cv, &aio->aio_mtx, timo); - if (error) { - if (error == EWOULDBLOCK) - error = SET_ERROR(EAGAIN); - break; - } - } - mutex_exit(&aio->aio_mtx); - return error; -} - int sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap, - register_t *retval) + register_t *retval) { int error; error = aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL); -#ifdef AIOSP struct proc *p = curlwp->l_proc; struct aioproc *aio = p->p_aio; KASSERT(aio); error = aiosp_distribute_jobs(&aio->aiosp); -#endif return error; } int sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, - register_t *retval) + register_t *retval) { /* { syscallarg(int) mode; @@ -1067,12 +1732,12 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, struct sigevent *sig = &lio->sig; error = copyin(SCARG(uap, sig), &lio->sig, - sizeof(struct sigevent)); + sizeof(struct sigevent)); if (error == 0 && - (sig->sigev_signo < 0 || - sig->sigev_signo >= NSIG || - sig->sigev_notify < SIGEV_NONE || - sig->sigev_notify > SIGEV_SA)) + (sig->sigev_signo < 0 || + sig->sigev_signo >= NSIG || + sig->sigev_notify < SIGEV_NONE || + sig->sigev_notify > SIGEV_SA)) error = SET_ERROR(EINVAL); } else memset(&lio->sig, 0, sizeof(struct sigevent)); @@ -1090,7 +1755,7 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, /* Get the list from user-space */ aiocbp_list = kmem_alloc(nent * sizeof(*aiocbp_list), KM_SLEEP); error = copyin(SCARG(uap, list), aiocbp_list, - nent * sizeof(*aiocbp_list)); + nent * sizeof(*aiocbp_list)); if (error) { mutex_enter(&aio->aio_mtx); goto err; @@ -1108,12 +1773,10 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, errcnt++; } -#ifdef AIOSP error = aiosp_distribute_jobs(&aio->aiosp); if (error) { return error; } -#endif mutex_enter(&aio->aio_mtx); @@ -1124,14 +1787,7 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, } if (mode == LIO_WAIT) { - /* - * Wait for AIO completion. In such case, - * the LIO structure will be freed here. - */ - while (lio->refcnt > 1 && error == 0) - error = cv_wait_sig(&aio->done_cv, &aio->aio_mtx); - if (error) - error = SET_ERROR(EINTR); + // IMPLEMENT THIS } err: @@ -1139,7 +1795,7 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, lio = NULL; mutex_exit(&aio->aio_mtx); if (lio != NULL) { - aio_sendsig(p, &lio->sig); + aiost_sigsend(p, &lio->sig); pool_put(&aio_lio_pool, lio); } kmem_free(aiocbp_list, nent * sizeof(*aiocbp_list)); @@ -1197,35 +1853,35 @@ SYSCTL_SETUP(sysctl_aio_init, "aio sysctl") int rv; rv = sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, - CTLTYPE_INT, "posix_aio", - SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " + CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, + CTLTYPE_INT, "posix_aio", + SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " "Asynchronous I/O option to which the " "system attempts to conform"), - NULL, _POSIX_ASYNCHRONOUS_IO, NULL, 0, - CTL_KERN, CTL_CREATE, CTL_EOL); + NULL, _POSIX_ASYNCHRONOUS_IO, NULL, 0, + CTL_KERN, CTL_CREATE, CTL_EOL); if (rv != 0) return; rv = sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT | CTLFLAG_READWRITE, - CTLTYPE_INT, "aio_listio_max", - SYSCTL_DESCR("Maximum number of asynchronous I/O " + CTLFLAG_PERMANENT | CTLFLAG_READWRITE, + CTLTYPE_INT, "aio_listio_max", + SYSCTL_DESCR("Maximum number of asynchronous I/O " "operations in a single list I/O call"), - sysctl_aio_listio_max, 0, &aio_listio_max, 0, - CTL_KERN, CTL_CREATE, CTL_EOL); + sysctl_aio_listio_max, 0, &aio_listio_max, 0, + CTL_KERN, CTL_CREATE, CTL_EOL); if (rv != 0) return; rv = sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT | CTLFLAG_READWRITE, - CTLTYPE_INT, "aio_max", - SYSCTL_DESCR("Maximum number of asynchronous I/O " + CTLFLAG_PERMANENT | CTLFLAG_READWRITE, + CTLTYPE_INT, "aio_max", + SYSCTL_DESCR("Maximum number of asynchronous I/O " "operations"), - sysctl_aio_max, 0, &aio_max, 0, - CTL_KERN, CTL_CREATE, CTL_EOL); + sysctl_aio_max, 0, &aio_max, 0, + CTL_KERN, CTL_CREATE, CTL_EOL); return; } @@ -1237,47 +1893,5 @@ SYSCTL_SETUP(sysctl_aio_init, "aio sysctl") void aio_print_jobs(void (*pr)(const char *, ...)) { - struct proc *p = curlwp->l_proc; - struct aioproc *aio; - struct aio_job *a_job; - struct aiocb *aiocbp; - - if (p == NULL) { - (*pr)("AIO: We are not in the processes right now.\n"); - return; - } - - aio = p->p_aio; - if (aio == NULL) { - (*pr)("AIO data is not initialized (PID = %d).\n", p->p_pid); - return; - } - - (*pr)("AIO: PID = %d\n", p->p_pid); - (*pr)("AIO: Global count of the jobs = %u\n", aio_jobs_count); - (*pr)("AIO: Count of the jobs = %u\n", aio->jobs_count); - - if (aio->curjob) { - a_job = aio->curjob; - (*pr)("\nAIO current job:\n"); - (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n", - a_job->aio_op, a_job->aiocbp._errno, - a_job->aiocbp._state, a_job->aiocb_uptr); - aiocbp = &a_job->aiocbp; - (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n", - aiocbp->aio_fildes, aiocbp->aio_offset, - aiocbp->aio_buf, aiocbp->aio_nbytes); - } - - (*pr)("\nAIO queue:\n"); - TAILQ_FOREACH(a_job, &aio->jobs_queue, list) { - (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n", - a_job->aio_op, a_job->aiocbp._errno, - a_job->aiocbp._state, a_job->aiocb_uptr); - aiocbp = &a_job->aiocbp; - (*pr)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n", - aiocbp->aio_fildes, aiocbp->aio_offset, - aiocbp->aio_buf, aiocbp->aio_nbytes); - } } #endif /* defined(DDB) */ diff --git a/sys/kern/sys_aiosp.c b/sys/kern/sys_aiosp.c deleted file mode 100644 index 3b1c67fc862b2..0000000000000 --- a/sys/kern/sys_aiosp.c +++ /dev/null @@ -1,1251 +0,0 @@ -/* $NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp $ */ - -/* - * Copyright (c) 2025 The NetBSD Foundation, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * NetBSD asynchronous I/O service pool implementation - * - * Design overview - * - * Thread pool architecture: - * Each process maintains an aiosp (service pool) with worker threads (aiost) - * Workers are recycled via freelist/active lists to minimize thread creation - * Workers sleep on service_cv until jobs are assigned - * On process termination, all associated service threads are terminated - * - * Job distribution strategy: - * Jobs are initially queued to aiosp->jobs pending distribution - * Regular files: Jobs are grouped by file descriptor for potential coalescing - * Multiple jobs on same fp are assigned to one thread via aiost_file_group - * Enables future optimizations like request merging and vectored I/O - * Nonregular files: Each job gets a dedicated worker (no coalescing) - * Distribution occurs when aiosp_distribute_jobs() is called - * - * Job tracking: - * Hash table (aiocbp_hash) maps userspace aiocb pointers to kernel jobs - * Prevents duplicate submissions of same aiocb - * Enables O(1) lookup for aio_error/aio_return/aio_suspend operations - * Hash collision resolution via chaining (TAILQ per bucket) - * - * Completion notification: - * Twophase notification: waitgroup signaling then signal delivery - * Aiowaitgrouplk attached to each job tracks all waiting suspend operations - * On completion, all registered waitgroups are notified atomically - * Supports both any (wake on first completion) and all (wake when all done) modes - * Waitgroups are reference counted to handle concurrent completion/registration - * - * Thread lifecycle: - * Threads handle both singleton jobs and filegrouped batches - * After processing, threads return to freelist for reuse - * Thread termination uses state machine (none->operation->terminate) - * Abrupt process termination handled via signal checks in cv_wait_sig() - * - * Synchronization model: - * Hierarchical locking: aiosp->mtx > aiost->mtx > job->mtx - * aiosp->mtx: Protects job queues, thread lists, and file group tree - * aiost->mtx: Protects thread state transitions - * job->mtx: Protects completion flag only - * aiowaitgrouplk->mtx: Protects waitgroup array modifications - * - * File group management: - * RB tree (aiost_file_tree) maintains active file groups - * Groups are created ondemand when regular file jobs are distributed - * Groups are destroyed when all jobs for that fp complete - * Enables future enhancements like dynamic job appending during processing - * - * Implementation notes - * - * io_read/write currently use fallback implementations - * Buffer array (job->buf) reserved for future vectored I/O support - * File groups could be extended for list I/O (lio_listio) kernelside batching - * Range locking infrastructure planned but not yet implemented - */ - -#include -__KERNEL_RCSID(0, "$NetBSD: sys_aiosp.c,v 0.00 2025/05/18 12:00:00 ethan4984 Exp $"); - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE(MODULE_CLASS_MISC, aiosp, NULL); - -static int aiost_create(struct aiosp *, struct aiost **); -static int aiost_terminate(struct aiost *); -static void aiost_entry(void *); -static void aiost_sigsend(struct proc *, struct sigevent *); -static int aiosp_worker_extract(struct aiosp *, struct aiost **); - -static int io_write(struct aiost *, struct aio_job *); -static int io_read(struct aiost *, struct aio_job *); -static int io_sync(struct aiost *); -static int uio_construct(struct aio_job *, struct file **, - struct iovec *, struct uio *); -static int io_write_fallback(struct aio_job *); -static int io_read_fallback(struct aio_job *); - -/* - * Module interface - */ -static int -aiosp_modcmd(modcmd_t cmd, void *arg) -{ - switch (cmd) { - case MODULE_CMD_INIT: - return 0; - case MODULE_CMD_FINI: - return 0; - default: - return SET_ERROR(ENOTTY); - } -} - -/* - * Order RB with respect to fp - */ -static int -aiost_file_group_cmp(struct aiost_file_group *a, struct aiost_file_group *b) -{ - if (a == NULL || b == NULL) { - return (a == b) ? 0 : (a ? 1 : -1); - } - - uintptr_t ap = (uintptr_t)a->fp; - uintptr_t bp = (uintptr_t)b->fp; - - return (ap < bp) ? -1 : (ap > bp) ? 1 : 0; -} - -RB_HEAD(aiost_file_tree, aiost_file_group); -RB_PROTOTYPE(aiost_file_tree, aiost_file_group, tree, aiost_file_group_cmp); -RB_GENERATE(aiost_file_tree, aiost_file_group, tree, aiost_file_group_cmp); - -/* - * Group jobs by file descriptor and distribute to service threads. - * Regular files are coalesced per-fp, others get individual threads. - * Must be called with jobs queued in sp->jobs - */ -int -aiosp_distribute_jobs(struct aiosp *sp) -{ - struct aio_job *job; - struct file *fp; - int error = 0; - - mutex_enter(&sp->mtx); - if (!sp->jobs_pending) { - mutex_exit(&sp->mtx); - return 0; - } - - struct aio_job *tmp; - TAILQ_FOREACH_SAFE(job, &sp->jobs, list, tmp) { - fp = fd_getfile2(job->p, job->aiocbp.aio_fildes); - if (fp == NULL) { - mutex_exit(&sp->mtx); - error = SET_ERROR(EBADF); - return error; - } - - struct aiost_file_group *fg = NULL; - struct aiost *aiost = NULL; - - if (fp->f_vnode && fp->f_vnode->v_type == VREG) { - struct aiost_file_group find = { 0 }; - find.fp = fp; - fg = RB_FIND(aiost_file_tree, sp->fg_root, &find); - - if (fg == NULL) { - fg = kmem_zalloc(sizeof(*fg), KM_SLEEP); - fg->fp = fp; - fg->vp = fp->f_vnode; - fg->queue_size = 0; - TAILQ_INIT(&fg->queue); - - error = aiosp_worker_extract(sp, &aiost); - if (error) { - kmem_free(fg, sizeof(*fg)); - closef(fp); - mutex_exit(&sp->mtx); - return error; - } - - RB_INSERT(aiost_file_tree, sp->fg_root, fg); - fg->aiost = aiost; - - aiost->fg = fg; - aiost->job = NULL; - } else { - /* - * release fp as it already exists within fg - */ - closef(fp); - aiost = fg->aiost; - } - } else { - error = aiosp_worker_extract(sp, &aiost); - if (error) { - closef(fp); - mutex_exit(&sp->mtx); - return error; - } - - aiost->fg = NULL; - aiost->job = job; - } - - /* - * Move from sp->jobs to fg->jobs - */ - TAILQ_REMOVE(&sp->jobs, job, list); - sp->jobs_pending--; - - if (fg) { - TAILQ_INSERT_TAIL(&fg->queue, job, list); - fg->queue_size++; - } - - mutex_enter(&aiost->mtx); - aiost->freelist = false; - aiost->state = AIOST_STATE_OPERATION; - mutex_exit(&aiost->mtx); - cv_signal(&aiost->service_cv); - } - - mutex_exit(&sp->mtx); - - return error; -} - -/* - * Wait for specified AIO operations to complete - * Create a waitgroup to monitor the specified aiocb list. - * Returns when timeout expires or completion criteria met - * - * AIOSP_SUSPEND_ANY return when any job completes - * AIOSP_SUSPEND_ALL return when all jobs complete - */ -int -aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, - struct timespec *ts, int flags) -{ - struct aio_job *job; - int error = 0; - int timo; - size_t target = 0; - size_t monitor = 0; - - if (ts) { - timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); - if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0) { - timo = 1; - } - - if (timo <= 0) { - error = SET_ERROR(EAGAIN); - return error; - } - } else { - timo = 0; - } - - struct aiowaitgroup *wg = kmem_zalloc(sizeof(*wg), KM_SLEEP); - aiowaitgroup_init(wg); - - mutex_enter(&wg->mtx); - for (int i = 0; i < nent; i++) { - if (aiocbp_list[i] == NULL) { - continue; - } - - struct aiocbp *aiocbp = NULL; - error = aiocbp_lookup(aiosp, &aiocbp, aiocbp_list[i]); - if (error) { - goto done; - } - if (aiocbp == NULL) { - continue; - } - - job = aiocbp->job; - monitor++; - - mutex_enter(&job->mtx); - if (job->completed) { - wg->completed++; - wg->total++; - } else { - aiowaitgroup_join(wg, &job->lk); - } - mutex_exit(&job->mtx); - } - - if (!monitor) { - goto done; - } - - if (flags & AIOSP_SUSPEND_ANY) { - target = 1; - } else if (flags & AIOSP_SUSPEND_ALL) { - target = monitor; - } - - for (; wg->completed < target;) { - error = aiowaitgroup_wait(wg, timo); - if (error) { - goto done; - } - } - -done: - wg->active = false; - wg->refcnt--; - - if (wg->refcnt == 0) { - mutex_exit(&wg->mtx); - aiowaitgroup_fini(wg); - } else { - mutex_exit(&wg->mtx); - } - - return error; -} - -/* - * Initializes a servicing pool. - */ -int -aiosp_initialize(struct aiosp *sp) -{ - mutex_init(&sp->mtx, MUTEX_DEFAULT, IPL_NONE); - TAILQ_INIT(&sp->freelist); - TAILQ_INIT(&sp->active); - TAILQ_INIT(&sp->jobs); - sp->fg_root = kmem_zalloc(sizeof(*sp->fg_root), KM_SLEEP); - RB_INIT(sp->fg_root); - - return 0; -} - -/* - * Extract an available worker thread from pool or create new one - */ -static int -aiosp_worker_extract(struct aiosp *sp, struct aiost **aiost) -{ - int error; - - if (sp->nthreads_free == 0) { - error = aiost_create(sp, aiost); - if (error) { - return error; - } - } else { - *aiost = TAILQ_LAST(&sp->freelist, aiost_list); - } - - TAILQ_REMOVE(&sp->freelist, *aiost, list); - sp->nthreads_free--; - TAILQ_INSERT_TAIL(&sp->active, *aiost, list); - sp->nthreads_active++; - - return 0; -} - - -/* - * Each process keeps track of all the service threads instantiated to service - * an asynchronous operation by the process. When a process is terminated we - * must also terminate all of its active and pending asynchronous operation. - */ -int -aiosp_destroy(struct aiosp *sp) -{ - struct aiost *st; - struct aiost *tmp; - int error = 0; - - mutex_enter(&sp->mtx); - - /* - * Terminate and destroy every service thread both free and active. - */ - TAILQ_FOREACH_SAFE(st, &sp->freelist, list, tmp) { - error = aiost_terminate(st); - if (error) { - mutex_exit(&sp->mtx); - return error; - } - - kmem_free(st, sizeof(*st)); - } - - TAILQ_FOREACH_SAFE(st, &sp->active, list, tmp) { - error = aiost_terminate(st); - if (error) { - mutex_exit(&sp->mtx); - return error; - } - - kmem_free(st, sizeof(*st)); - } - - mutex_exit(&sp->mtx); - mutex_destroy(&sp->mtx); - - return 0; -} - - -/* - * Enqueue a job for processing by the process's servicing pool - */ -int -aiosp_enqueue_job(struct aiosp *aiosp, struct aio_job *job) -{ - mutex_enter(&aiosp->mtx); - - TAILQ_INSERT_TAIL(&aiosp->jobs, job, list); - aiosp->jobs_pending++; - - mutex_exit(&aiosp->mtx); - - return 0; -} - -/* - * Create and initialise a new servicing thread and append it to the freelist. - */ -static int -aiost_create(struct aiosp *sp, struct aiost **ret) -{ - struct proc *p = curlwp->l_proc; - struct aiost *st; - - st = kmem_zalloc(sizeof(*st), KM_SLEEP); - - mutex_init(&st->mtx, MUTEX_DEFAULT, IPL_NONE); - cv_init(&st->service_cv, "aioservice"); - - st->job = NULL; - st->state = AIOST_STATE_NONE; - st->aiosp = sp; - st->freelist = true; - - TAILQ_INSERT_TAIL(&sp->freelist, st, list); - sp->nthreads_free++; - sp->nthreads_total++; - - int error = kthread_create(PRI_USER, 0, NULL, aiost_entry, - st, &st->lwp, "aio_%d_%ld", p->p_pid, sp->nthreads_total); - if (error) { - return error; - } - - if (ret) { - *ret = st; - } - - return 0; -} - -/* - * Process single job without coalescing. - */ -static void -aiost_process_singleton (struct aiost *st) -{ - struct aio_job *job; - - job = st->job; - KASSERT(job != NULL); - if (job->aio_op & AIO_READ) { - io_read_fallback(job); - } else if (job->aio_op & AIO_WRITE) { - io_write_fallback(job); - } else if (job->aio_op & AIO_SYNC) { - io_sync(st); - } else { - panic("aio_process: invalid operation code\n"); - } - - mutex_enter(&job->mtx); - aiowaitgrouplk_flush(&job->lk); - job->completed = true; - mutex_exit(&job->mtx); - - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); -} - -/* - * Process all jobs in a file group. - */ -static void -aiost_process_fg (struct aiost *st) -{ - struct aiosp *sp = st->aiosp; - struct aiost_file_group *fg = st->fg; - struct aio_job *job; - - struct aio_job *tmp; - TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { - if (job->aio_op & AIO_READ) { - io_read(st, job); - } else if (job->aio_op & AIO_WRITE) { - io_write(st, job); - } else if (job->aio_op & AIO_SYNC) { - io_sync(st); - } else { - panic("aio_process: invalid operation code\n"); - } - - mutex_enter(&job->mtx); - job->completed = true; - mutex_exit(&job->mtx); - - aiowaitgrouplk_flush(&job->lk); - - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); - } - - mutex_enter(&sp->mtx); - RB_REMOVE(aiost_file_tree, sp->fg_root, fg); - closef(fg->fp); - kmem_free(fg, sizeof(*fg)); - mutex_exit(&sp->mtx); -} - -/* - * Service thread entry point. Processes assigned jobs until termination. - * Handles both singleton jobs and file-grouped job batches. - */ -static void -aiost_entry(void *arg) -{ - struct aiost *st = arg; - struct aiosp *sp = st->aiosp; - int error; - - /* - * We want to handle abrupt process terminations effectively. We use - * st->exit to indicate that the thread must exit. When a thread is - * terminated aiost_terminate(st) unblocks those sleeping on - * st->service_cv - */ - mutex_enter(&st->mtx); - for(;;) { - for (; st->state == AIOST_STATE_NONE;) { - error = cv_wait_sig(&st->service_cv, &st->mtx); - if (error) { - /* - * Thread was interrupt. Check for pending exit - * or suspension - */ - mutex_exit(&st->mtx); - lwp_userret(curlwp); - mutex_enter(&st->mtx); - } - } - - if (st->state == AIOST_STATE_TERMINATE) { - break; - } - - if (st->state != AIOST_STATE_OPERATION) { - panic("aio_process: invalid aiost state {%x}\n", - st->state); - } - - if (st->fg) { - mutex_exit(&st->mtx); - aiost_process_fg(st); - mutex_enter(&st->mtx); - } else { - mutex_exit(&st->mtx); - aiost_process_singleton(st); - mutex_enter(&st->mtx); - } - - st->state = AIOST_STATE_NONE; - st->job = NULL; - st->fg = NULL; - - /* - * Remove st from list of active service threads, append to - * freelist, dance around locks, then iterate loop and block on - * st->service_cv - */ - mutex_exit(&st->mtx); - mutex_enter(&sp->mtx); - mutex_enter(&st->mtx); - - st->freelist = true; - - TAILQ_REMOVE(&sp->active, st, list); - sp->nthreads_active--; - - TAILQ_INSERT_TAIL(&sp->freelist, st, list); - sp->nthreads_free++; - - mutex_exit(&sp->mtx); - } - - mutex_exit(&st->mtx); - mutex_enter(&sp->mtx); - - if (st->freelist) { - TAILQ_REMOVE(&sp->freelist, st, list); - sp->nthreads_free--; - } else { - TAILQ_REMOVE(&sp->active, st, list); - sp->nthreads_active--; - } - sp->nthreads_total--; - - mutex_exit(&sp->mtx); - kthread_exit(0); -} - -/* - * send AIO signal. - */ -static void -aiost_sigsend(struct proc *p, struct sigevent *sig) -{ - ksiginfo_t ksi; - - if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE) - return; - - KSI_INIT(&ksi); - ksi.ksi_signo = sig->sigev_signo; - ksi.ksi_code = SI_ASYNCIO; - ksi.ksi_value = sig->sigev_value; - - mutex_enter(&proc_lock); - kpsignal(p, &ksi, NULL); - mutex_exit(&proc_lock); -} - -/* - * Process write operation for non-blocking jobs. - */ -static int -io_write(struct aiost *aiost, struct aio_job *job) -{ - return io_write_fallback(job); -} - -/* - * Process read operation for non-blocking jobs. - */ -static int -io_read(struct aiost *aiost, struct aio_job *job) -{ - return io_read_fallback(job); -} - -/* - * Initialize UIO structure for I/O operation. - */ -static int -uio_construct(struct aio_job *job, struct file **fp, struct iovec *aiov, - struct uio *auio) -{ - struct aiocb *aiocbp = &job->aiocbp; - int fd = aiocbp->aio_fildes; - int error = 0; - - if (aiocbp->aio_nbytes > SSIZE_MAX) { - error = SET_ERROR(EINVAL); - return error; - } - - *fp = fd_getfile2(job->p, fd); - if (*fp == NULL) { - error = SET_ERROR(EBADF); - return error; - } - - aiov->iov_base = aiocbp->aio_buf; - aiov->iov_len = aiocbp->aio_nbytes; - auio->uio_iov = aiov; - auio->uio_iovcnt = 1; - auio->uio_resid = aiocbp->aio_nbytes; - auio->uio_offset = aiocbp->aio_offset; - auio->uio_vmspace = job->p->p_vmspace; - - return 0; -} - -/* - * Perform synchronous write via file operations. - */ -static int -io_write_fallback(struct aio_job *job) -{ - struct file *fp; - struct iovec aiov; - struct uio auio; - struct aiocb *aiocbp; - int error; - - error = uio_construct(job, &fp, &aiov, &auio); - if (error) { - if (fp) { - closef(fp); - } - - goto done; - } - - /* - * Perform write - */ - aiocbp = &job->aiocbp; - KASSERT(job->aio_op & AIO_WRITE); - - if ((fp->f_flag & FWRITE) == 0) { - closef(fp); - error = SET_ERROR(EBADF); - goto done; - } - auio.uio_rw = UIO_WRITE; - error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, - &auio, fp->f_cred, FOF_UPDATE_OFFSET); - - closef(fp); - - /* - * Store the result value - */ - job->aiocbp.aio_nbytes -= auio.uio_resid; - job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; -done: - job->aiocbp._errno = error; - job->aiocbp._state = JOB_DONE; - - return 0; -} - -/* - * Perform synchronous read via file operations. - */ -static int -io_read_fallback(struct aio_job *job) -{ - struct file *fp; - struct iovec aiov; - struct uio auio; - struct aiocb *aiocbp; - int error; - - error = uio_construct(job, &fp, &aiov, &auio); - if (error) { - if (fp) { - closef(fp); - } - goto done; - } - - /* - * Perform read - */ - aiocbp = &job->aiocbp; - KASSERT((job->aio_op & AIO_WRITE) == 0); - - if ((fp->f_flag & FREAD) == 0) { - closef(fp); - error = SET_ERROR(EBADF); - goto done; - } - auio.uio_rw = UIO_READ; - error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, - &auio, fp->f_cred, FOF_UPDATE_OFFSET); - - closef(fp); - - /* - * Store the result value - */ - job->aiocbp.aio_nbytes -= auio.uio_resid; - job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; -done: - job->aiocbp._errno = error; - job->aiocbp._state = JOB_DONE; - - return 0; -} - -/* - * Flush file data to stable storage. - */ -static int -io_sync(struct aiost *aiost) -{ - struct aio_job *job = aiost->job; - struct aiocb *aiocbp = &job->aiocbp; - struct file *fp; - int fd = aiocbp->aio_fildes; - int error = 0; - - /* - * Perform a file sync operation - */ - struct vnode *vp; - - if ((error = fd_getvnode(fd, &fp)) != 0) { - goto done; - } - - if ((fp->f_flag & FWRITE) == 0) { - fd_putfile(fd); - error = SET_ERROR(EBADF); - goto done; - } - - vp = fp->f_vnode; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - if (job->aio_op & AIO_DSYNC) { - error = VOP_FSYNC(vp, fp->f_cred, - FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); - } else if (job->aio_op & AIO_SYNC) { - error = VOP_FSYNC(vp, fp->f_cred, - FSYNC_WAIT, 0, 0); - } - VOP_UNLOCK(vp); - fd_putfile(fd); - - /* - * Store the result value - */ - job->aiocbp._retval = (error == 0) ? 0 : -1; -done: - job->aiocbp._errno = error; - job->aiocbp._state = JOB_DONE; - - copyout(&job->aiocbp, job->aiocb_uptr, - sizeof(struct aiocb)); - - return 0; -} - -/* - * Destroy a servicing thread. Set st->exit high such that when we unblock the - * thread blocking on st->service_cv it will invoke an exit routine within - * aiost_entry. - */ -static int -aiost_terminate(struct aiost *st) -{ - int error = 0; - - mutex_enter(&st->mtx); - - st->state = AIOST_STATE_TERMINATE; - - mutex_exit(&st->mtx); - - cv_signal(&st->service_cv); - kthread_join(st->lwp); - - cv_destroy(&st->service_cv); - mutex_destroy(&st->mtx); - - return error; -} - -/* - * Ensure that the same job can not be enqueued twice. - */ -int -aiosp_validate_conflicts(struct aiosp *aiosp, const void *uptr) -{ - struct aiost *st; - - mutex_enter(&aiosp->mtx); - - /* check active threads */ - TAILQ_FOREACH(st, &aiosp->active, list) { - KASSERT(st->job); - if (st->job->aiocb_uptr == uptr) { - mutex_exit(&aiosp->mtx); - return EINVAL; - } - } - - /* no need to check freelist threads as they have no jobs */ - - mutex_exit(&aiosp->mtx); - return 0; -} - -/* - * - */ -int aiosp_error(struct aiosp *aiosp, const void *uptr, register_t *retval) -{ - struct aiocbp *aiocbp = NULL; - struct aio_job *job; - int error; - - error = aiocbp_lookup(aiosp, &aiocbp, uptr); - if (error) { - return error; - } - - job = aiocbp->job; - if (job->aiocbp._state == JOB_NONE) { - return SET_ERROR(EINVAL); - } - - *retval = job->aiocbp._errno; - - return error; -} - -/* - * - */ -int aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) -{ - struct aiocbp *aiocbp = NULL; - struct aio_job *job; - int error; - - error = aiocbp_lookup(aiosp, &aiocbp, uptr); - if (error) { - return error; - } - job = aiocbp->job; - - if (job->aiocbp._errno == EINPROGRESS || job->aiocbp._state != JOB_DONE) { - return SET_ERROR(EINVAL); - } - - *retval = job->aiocbp._retval; - - job->aiocbp._errno = 0; - job->aiocbp._retval = -1; - job->aiocbp._state = JOB_NONE; - - return 0; -} - -/* - * Hash function for aiocb user pointers. - */ -static inline u_int -aiocbp_hash(const void *uptr) -{ - return hash32_buf(&uptr, sizeof(uptr), HASH32_BUF_INIT); -} - -/* - * Find aiocb entry by user pointer. - */ -int -aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, const void *uptr) -{ - struct aiocbp *aiocbp; - u_int hash; - - hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; - - mutex_enter(&aiosp->aio_hash_mtx); - TAILQ_FOREACH(aiocbp, &aiosp->aio_hash[hash], list) { - if (aiocbp->uptr == uptr) { - *aiocbpp = aiocbp; - mutex_exit(&aiosp->aio_hash_mtx); - return 0; - } - } - mutex_exit(&aiosp->aio_hash_mtx); - - return ENOENT; -} - -/* - * Remove aiocb entry from hash table. - */ -int -aiocbp_remove(struct aiosp *aiosp, const void *uptr) -{ - struct aiocbp *aiocbp; - u_int hash; - - hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; - - struct aiocbp *tmp; - mutex_enter(&aiosp->aio_hash_mtx); - TAILQ_FOREACH_SAFE(aiocbp, &aiosp->aio_hash[hash], list, tmp) { - if (aiocbp->uptr == uptr) { - TAILQ_REMOVE(&aiosp->aio_hash[hash], aiocbp, list); - mutex_exit(&aiosp->aio_hash_mtx); - return 0; - } - } - mutex_exit(&aiosp->aio_hash_mtx); - - return ENOENT; -} - -/* - * Insert aiocb entry into hash table. - */ -int -aiocbp_insert(struct aiosp *aiosp, struct aiocbp *aiocbp) -{ - struct aiocbp *found; - const void *uptr; - u_int hash; - - uptr = aiocbp->uptr; - hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; - - mutex_enter(&aiosp->aio_hash_mtx); - TAILQ_FOREACH(found, &aiosp->aio_hash[hash], list) { - if (found->uptr == uptr) { - found->job = aiocbp->job; - mutex_exit(&aiosp->aio_hash_mtx); - return EEXIST; - } - } - - TAILQ_INSERT_HEAD(&aiosp->aio_hash[hash], aiocbp, list); - mutex_exit(&aiosp->aio_hash_mtx); - - return 0; -} - -/* - * Initialize aiocb hash table. - */ -int -aiocbp_init(struct aiosp *aiosp, u_int hashsize) -{ - if (!powerof2(hashsize)) { - return EINVAL; - } - - aiosp->aio_hash = kmem_zalloc(hashsize * sizeof(*aiosp->aio_hash), - KM_SLEEP); - - aiosp->aio_hash_mask = hashsize - 1; - aiosp->aio_hash_size = hashsize; - - mutex_init(&aiosp->aio_hash_mtx, MUTEX_DEFAULT, IPL_NONE); - - for (size_t i = 0; i < hashsize; i++) { - TAILQ_INIT(&aiosp->aio_hash[i]); - } - - return 0; -} - -/* - * Destroy aiocb hash table and free entries. - */ -void -aiocbp_destroy(struct aiosp *aiosp) -{ - if (aiosp->aio_hash == NULL) { - return; - } - - struct aiocbp *aiocbp; - - mutex_enter(&aiosp->aio_hash_mtx); - for (size_t i = 0; i < aiosp->aio_hash_size; i++) { - struct aiocbp *tmp; - TAILQ_FOREACH_SAFE(aiocbp, &aiosp->aio_hash[i], list, tmp) { - TAILQ_REMOVE(&aiosp->aio_hash[i], aiocbp, list); - kmem_free(aiocbp, sizeof(*aiocbp)); - } - } - - kmem_free(aiosp->aio_hash, - aiosp->aio_hash_size * sizeof(*aiosp->aio_hash)); - aiosp->aio_hash = NULL; - aiosp->aio_hash_mask = 0; - aiosp->aio_hash_size = 0; - mutex_exit(&aiosp->aio_hash_mtx); -} - -/* - * Initialize wait group for suspend operations. - */ -void -aiowaitgroup_init(struct aiowaitgroup *wg) -{ - wg->completed = 0; - wg->total = 0; - wg->refcnt = 1; - wg->active = true; - cv_init(&wg->done_cv, "aiodone"); - mutex_init(&wg->mtx, MUTEX_DEFAULT, IPL_NONE); -} - -/* - * Clean up wait group resources. - */ -void -aiowaitgroup_fini(struct aiowaitgroup *wg) -{ - cv_destroy(&wg->done_cv); - mutex_destroy(&wg->mtx); - kmem_free(wg, sizeof(*wg)); -} - -/* - * Block until wait group signals completion. - */ -int -aiowaitgroup_wait(struct aiowaitgroup *wg, int timo) -{ - int error; - - error = cv_timedwait_sig(&wg->done_cv, &wg->mtx, timo); - if (error) { - if (error == EWOULDBLOCK) { - error = SET_ERROR(EAGAIN); - } - return error; - } - - return 0; -} - -/* - * Initialize wait group link for job tracking. - */ -void -aiowaitgrouplk_init(struct aiowaitgrouplk *lk) -{ - mutex_init(&lk->mtx, MUTEX_DEFAULT, IPL_NONE); - lk->n = 0; - lk->s = 2; - lk->wgs = kmem_alloc(sizeof(*lk->wgs) * lk->s, KM_SLEEP); -} - -/* - * Clean up wait group link resources. - */ -void -aiowaitgrouplk_fini(struct aiowaitgrouplk *lk) -{ - mutex_destroy(&lk->mtx); - - if (lk->s) { - kmem_free(lk->wgs, sizeof(*lk->wgs) * lk->s); - } -} - -/* - * Notify all wait groups of job completion. - */ -void -aiowaitgrouplk_flush(struct aiowaitgrouplk *lk) -{ - mutex_enter(&lk->mtx); - for (int i = 0; i < lk->n; i++) { - struct aiowaitgroup *wg = lk->wgs[i]; - if (wg == NULL) { - continue; - } - - mutex_enter(&wg->mtx); - - if (wg->active) { - wg->completed++; - cv_signal(&wg->done_cv); - } - - if (--wg->refcnt == 0) { - mutex_exit(&wg->mtx); - aiowaitgroup_fini(wg); - } else { - mutex_exit(&wg->mtx); - } - } - - if (lk->n) { - kmem_free(lk->wgs, sizeof(*lk->wgs) * lk->s); - - lk->n = 0; - lk->s = 2; - lk->wgs = kmem_alloc(sizeof(*lk->wgs) * lk->s, KM_SLEEP); - } - - mutex_exit(&lk->mtx); -} - -/* - * Attach wait group to jobs notification list. - */ -void -aiowaitgroup_join(struct aiowaitgroup *wg, struct aiowaitgrouplk *lk) -{ - mutex_enter(&lk->mtx); - if (lk->n == lk->s) { - size_t new_size = lk->s * lk->s; - - void **new_wgs = kmem_zalloc(new_size * - sizeof(*new_wgs), KM_SLEEP); - - memcpy(new_wgs, lk->wgs, lk->n * sizeof(*lk->wgs)); - kmem_free(lk->wgs, lk->s * sizeof(*lk->wgs)); - - lk->s = new_size; - lk->wgs = new_wgs; - } - lk->wgs[lk->n] = wg; - lk->n++; - wg->total++; - wg->refcnt++; - mutex_exit(&lk->mtx); -} diff --git a/sys/sys/aio.h b/sys/sys/aio.h index baa1941741868..8ac25a398416c 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -121,8 +121,6 @@ struct aio_job { struct proc *p; /* Originating process */ bool completed; /* Job completion status */ struct aiowaitgrouplk lk; /* List of waitgroups waiting on this job */ - struct buf **buf; /* Buffer array for vectored I/O (unused?) */ - uint nbuf; /* Number of buffers (unused?) */ TAILQ_ENTRY(aio_job) list; struct lio_req *lio; /* List I/O request (if part of lio_listio) */ }; @@ -193,12 +191,8 @@ struct lio_req { /* Structure of AIO data for process */ struct aioproc { kmutex_t aio_mtx; /* Protects the entire structure */ - kcondvar_t aio_worker_cv; /* Signals on a new job */ - kcondvar_t done_cv; /* Signals when the job is done */ struct aio_job *curjob; /* Currently processing AIO job */ unsigned int jobs_count; /* Count of the jobs */ - TAILQ_HEAD(, aio_job) jobs_queue;/* Queue of the AIO jobs */ - struct lwp *aio_worker; /* AIO worker thread */ struct aiosp aiosp; /* Per-process service pool */ }; From 343308d20a7f45b44f0389c3b3c10c115b5471cc Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Sat, 16 Aug 2025 01:45:46 -0600 Subject: [PATCH 37/53] aio_exit --- sys/kern/sys_aio.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 9bfb3a1afb5ed..7fc3971095b87 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -290,7 +290,6 @@ aio_procinit(struct proc *p) static void aio_exit(struct proc *p, void *cookie) { - //struct aio_job *a_job; struct aioproc *aio; if (cookie != NULL) @@ -298,17 +297,6 @@ aio_exit(struct proc *p, void *cookie) else if ((aio = p->p_aio) == NULL) return; - /* Free AIO queue */ - // IMPLEMENT THIS BUT AIOSP - - /*while (!TAILQ_EMPTY(&aio->jobs_queue)) { - a_job = TAILQ_FIRST(&aio->jobs_queue); - TAILQ_REMOVE(&aio->jobs_queue, a_job, list); - pool_put(&aio_job_pool, a_job); - atomic_dec_uint(&aio_jobs_count); - }*/ - - /* Destroy and free the entire AIO data structure */ aiocbp_destroy(&aio->aiosp); aiosp_destroy(&aio->aiosp); mutex_destroy(&aio->aio_mtx); @@ -794,6 +782,22 @@ aiost_entry(void *arg) mutex_exit(&sp->mtx); } + if (st->job) { + pool_put(&aio_job_pool, st->job); + atomic_dec_uint(&aio_jobs_count); + } else { + struct aiost_file_group *fg = st->fg; + KASSERT(fg); + + while (!TAILQ_EMPTY(&fg->queue)) { + struct aio_job *job = TAILQ_FIRST(&fg->queue); + TAILQ_REMOVE(&fg->queue, job, list); + pool_put(&aio_job_pool, job); + atomic_dec_uint(&aio_jobs_count); + } + } + + mutex_exit(&st->mtx); mutex_enter(&sp->mtx); From 1c4b170ef7dd4695a4da9ef0ab381c14e7ff6a26 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Thu, 21 Aug 2025 03:23:29 -0600 Subject: [PATCH 38/53] lio wait ddb sys_aio_cancel --- sys/kern/sys_aio.c | 182 +++++++++++++++++++++++++++++++++++++++++++-- sys/sys/aio.h | 5 +- 2 files changed, 179 insertions(+), 8 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 7fc3971095b87..c008e76ece1e2 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -263,7 +263,7 @@ aio_procinit(struct proc *p) error = aiocbp_init(&aio->aiosp, 256); if (error) { - aiosp_destroy(&aio->aiosp); + aiosp_destroy(&aio->aiosp, NULL); kmem_free(aio, sizeof(struct aioproc)); return error; } @@ -298,7 +298,7 @@ aio_exit(struct proc *p, void *cookie) return; aiocbp_destroy(&aio->aiosp); - aiosp_destroy(&aio->aiosp); + aiosp_destroy(&aio->aiosp, NULL); mutex_destroy(&aio->aio_mtx); kmem_free(aio, sizeof(struct aioproc)); } @@ -382,6 +382,7 @@ aiosp_distribute_jobs(struct aiosp *sp) */ TAILQ_REMOVE(&sp->jobs, job, list); sp->jobs_pending--; + job->on_queue = false; if (fg) { TAILQ_INSERT_TAIL(&fg->queue, job, list); @@ -552,11 +553,12 @@ aiosp_worker_extract(struct aiosp *sp, struct aiost **aiost) * must also terminate all of its active and pending asynchronous operation. */ int -aiosp_destroy(struct aiosp *sp) +aiosp_destroy(struct aiosp *sp, int *cn) { struct aiost *st; struct aiost *tmp; int error = 0; + int cnt = 0; mutex_enter(&sp->mtx); @@ -570,6 +572,7 @@ aiosp_destroy(struct aiosp *sp) return error; } + cnt++; kmem_free(st, sizeof(*st)); } @@ -580,9 +583,14 @@ aiosp_destroy(struct aiosp *sp) return error; } + cnt++; kmem_free(st, sizeof(*st)); } + if (cn) { + *cn = cnt; + } + mutex_exit(&sp->mtx); mutex_destroy(&sp->mtx); @@ -599,6 +607,7 @@ aiosp_enqueue_job(struct aiosp *aiosp, struct aio_job *job) TAILQ_INSERT_TAIL(&aiosp->jobs, job, list); aiosp->jobs_pending++; + job->on_queue = true; mutex_exit(&aiosp->mtx); @@ -692,11 +701,10 @@ aiost_process_fg (struct aiost *st) } mutex_enter(&job->mtx); + aiowaitgrouplk_flush(&job->lk); job->completed = true; mutex_exit(&job->mtx); - aiowaitgrouplk_flush(&job->lk); - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); } @@ -1569,6 +1577,79 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, syscallarg(struct aiocb *) aiocbp; } */ + struct proc *p = l->l_proc; + struct aioproc *aio; + struct aiocb *aiocbp_uptr; + struct filedesc *fdp = p->p_fd; + struct aiosp *aiosp; + struct aio_job *job; + unsigned int fildes; + fdtab_t *dt; + int error; + + fildes = (unsigned int)SCARG(uap, fildes); + dt = atomic_load_consume(&fdp->fd_dt); + if (fildes >= dt->dt_nfiles) + return SET_ERROR(EBADF); + if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) + return SET_ERROR(EBADF); + + /* Check if AIO structure is initialized */ + if (p->p_aio == NULL) { + *retval = AIO_NOTCANCELED; + return 0; + } + + aio = p->p_aio; + aiocbp_uptr = (struct aiocb *)SCARG(uap, aiocbp); + aiosp = &aio->aiosp; + + mutex_enter(&aio->aio_mtx); + mutex_enter(&aiosp->mtx); + + if (aiocbp_uptr) { + struct aiocbp *aiocbp = NULL; + error = aiocbp_lookup(aiosp, &aiocbp, aiocbp_uptr); + if (error) { + mutex_exit(&aiosp->mtx); + mutex_exit(&aio->aio_mtx); + return error; + } + if (aiocbp) { + job = aiocbp->job; + + if (job->on_queue) { + TAILQ_REMOVE(&aiosp->jobs, job, list); + job->on_queue = false; + + mutex_enter(&job->mtx); + aiowaitgrouplk_flush(&job->lk); + job->completed = true; + mutex_exit(&job->mtx); + + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + + *retval = AIO_CANCELED; + } else { + if (job->completed) { + *retval = AIO_ALLDONE; + } else { + *retval = AIO_NOTCANCELED; + } + } + + mutex_exit(&aiosp->mtx); + mutex_exit(&aio->aio_mtx); + + return 0; + } + } + + /* Cancel all jobs associated with this file handle */ + + mutex_exit(&aiosp->mtx); + mutex_exit(&aio->aio_mtx); + return 0; } @@ -1791,7 +1872,8 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, } if (mode == LIO_WAIT) { - // IMPLEMENT THIS + error = aiosp_suspend(&aio->aiosp, aiocbp_list, nent, + NULL, AIOSP_SUSPEND_ALL); } err: @@ -1897,5 +1979,93 @@ SYSCTL_SETUP(sysctl_aio_init, "aio sysctl") void aio_print_jobs(void (*pr)(const char *, ...)) { + struct proc *p = curlwp->l_proc; + struct aioproc *aio; + struct aiosp *sp; + struct aio_job *job; + + if (p == NULL) { + (*pr)("AIO: no current process context.\n"); + return; + } + + aio = p->p_aio; + if (aio == NULL) { + (*pr)("AIO: not initialized (pid=%d).\n", p->p_pid); + return; + } + + sp = &aio->aiosp; + + (*pr)("AIO: pid=%d\n", p->p_pid); + (*pr)("AIO: global jobs=%u, proc jobs=%u\n", aio_jobs_count, + aio->jobs_count); + (*pr)("AIO: sp{ total_threads=%zu active=%zu free=%zu pending=%zu processing=%lu hash_buckets=%zu mask=%#x }\n", + sp->nthreads_total, sp->nthreads_active, sp->nthreads_free, + sp->jobs_pending, (u_long)sp->njobs_processing, + sp->aio_hash_size, sp->aio_hash_mask); + + /* Pending queue */ + (*pr)("\nqueue (%zu pending):\n", sp->jobs_pending); + TAILQ_FOREACH(job, &sp->jobs, list) { + (*pr)(" op=%d err=%d state=%d uptr=%p completed=%d\n", + job->aio_op, job->aiocbp._errno, job->aiocbp._state, + job->aiocb_uptr, job->completed); + (*pr)(" fd=%d off=%llu buf=%p nbytes=%zu pri=%d lio=%p\n", + job->aiocbp.aio_fildes, + (unsigned long long)job->aiocbp.aio_offset, + (void *)job->aiocbp.aio_buf, + (size_t)job->aiocbp.aio_nbytes, + (int)job->pri, job->lio); + } + + /* Active service threads */ + (*pr)("\nactive threads (%zu):\n", sp->nthreads_active); + { + struct aiost *st; + TAILQ_FOREACH(st, &sp->active, list) { + (*pr)(" lwp=%p state=%d freelist=%d\n", + (void *)st->lwp, st->state, st->freelist ? 1 : 0); + + if (st->job) { + struct aio_job *j = st->job; + (*pr)(" job: op=%d err=%d state=%d uptr=%p\n", + j->aio_op, j->aiocbp._errno, j->aiocbp._state, + j->aiocb_uptr); + (*pr)(" fd=%d off=%llu buf=%p nbytes=%zu\n", + j->aiocbp.aio_fildes, + (unsigned long long)j->aiocbp.aio_offset, + (void *)j->aiocbp.aio_buf, + (size_t)j->aiocbp.aio_nbytes); + } + + if (st->fg) { + (*pr)(" file-group: vp=%p fp=%p qlen=%zu\n", + (void *)st->fg->vp, (void *)st->fg->fp, + st->fg->queue_size); + } + } + } + + /* Freelist summary */ + (*pr)("\nfree threads (%zu)\n", sp->nthreads_free); + + /* aiocbp hash maps user aiocbp to kernel job */ + (*pr)("\naiocbp hash: buckets=%zu\n", sp->aio_hash_size); + if (sp->aio_hash != NULL && sp->aio_hash_size != 0) { + size_t b; + for (b = 0; b < sp->aio_hash_size; b++) { + struct aiocbp *hc; + if (TAILQ_EMPTY(&sp->aio_hash[b])) { + continue; + } + + (*pr)(" [%zu]:", b); + TAILQ_FOREACH(hc, &sp->aio_hash[b], list) { + (*pr)(" uptr=%p job=%p", hc->uptr, (void *)hc->job); + } + (*pr)("\n"); + } + } } #endif /* defined(DDB) */ diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 8ac25a398416c..8da7e34b76255 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -120,6 +120,7 @@ struct aio_job { void *aiocb_uptr; /* User pointer for job identification */ struct proc *p; /* Originating process */ bool completed; /* Job completion status */ + bool on_queue; /* Whether or not this job is on sp->jobs */ struct aiowaitgrouplk lk; /* List of waitgroups waiting on this job */ TAILQ_ENTRY(aio_job) list; struct lio_req *lio; /* List I/O request (if part of lio_listio) */ @@ -175,6 +176,7 @@ struct aiosp { size_t jobs_pending; /* Number of pending jobs */ kmutex_t mtx; /* Protects structure */ size_t nthreads_total; /* Number of total servicing threads */ + volatile u_long njobs_processing;/* Number of total jobs currently being processed*/ struct aiocbp_list *aio_hash; /* Aiocbp hash root */ kmutex_t aio_hash_mtx; /* Protects the hash table */ size_t aio_hash_size; /* Total number of buckets */ @@ -191,7 +193,6 @@ struct lio_req { /* Structure of AIO data for process */ struct aioproc { kmutex_t aio_mtx; /* Protects the entire structure */ - struct aio_job *curjob; /* Currently processing AIO job */ unsigned int jobs_count; /* Count of the jobs */ struct aiosp aiosp; /* Per-process service pool */ }; @@ -206,7 +207,7 @@ void aio_print_jobs(void (*)(const char *, ...) __printflike(1, 2)); int aio_suspend1(struct lwp *, struct aiocb **, int, struct timespec *); int aiosp_initialize(struct aiosp *); -int aiosp_destroy(struct aiosp *); +int aiosp_destroy(struct aiosp *, int *); int aiosp_distribute_jobs(struct aiosp *); int aiosp_enqueue_job(struct aiosp *, struct aio_job *); int aiosp_suspend(struct aiosp *, struct aiocb **, int, struct timespec *, From 16696324802ac4403fddc46fa070cc1f6e4a9013 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Thu, 21 Aug 2025 16:08:46 -0600 Subject: [PATCH 39/53] sys_aio_cancel purge all jobs associated with the file handle --- sys/kern/sys_aio.c | 49 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index c008e76ece1e2..aca28f2b1c816 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -1583,7 +1583,8 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, struct filedesc *fdp = p->p_fd; struct aiosp *aiosp; struct aio_job *job; - unsigned int fildes; + unsigned int fildes, canceled = 0; + bool have_active = false; fdtab_t *dt; int error; @@ -1618,6 +1619,12 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, if (aiocbp) { job = aiocbp->job; + /* + * If the job is on sp->job (signified by job->on_queue) + * that means that it has been distribtued yet. And if + * it is not on the queue that means it is currently + * beign processed. + */ if (job->on_queue) { TAILQ_REMOVE(&aiosp->jobs, job, list); job->on_queue = false; @@ -1645,7 +1652,45 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, } } - /* Cancel all jobs associated with this file handle */ + /* + * Cancel all queued jobs associated with this file descriptor + */ + struct aio_job *tmp; + TAILQ_FOREACH_SAFE(job, &aiosp->jobs, list, tmp) { + if (job->aiocbp.aio_fildes == (int)fildes) { + TAILQ_REMOVE(&aiosp->jobs, job, list); + job->on_queue = false; + + mutex_enter(&job->mtx); + aiowaitgrouplk_flush(&job->lk); + job->completed = true; + mutex_exit(&job->mtx); + + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + canceled++; + } + } + + /* + * If there is a live file-group for this fp, then some requests + * are active and could not be canceled. + */ + { + struct file *fp = dt->dt_ff[fildes]->ff_file; + struct aiost_file_group find = { 0 }, *fg; + + find.fp = fp; + fg = RB_FIND(aiost_file_tree, aiosp->fg_root, &find); + have_active = (fg != NULL); + } + + if (canceled > 0 && !have_active) { + *retval = AIO_CANCELED; + } else if (canceled == 0) { + *retval = have_active ? AIO_NOTCANCELED : AIO_ALLDONE; + } else { + *retval = AIO_NOTCANCELED; + } mutex_exit(&aiosp->mtx); mutex_exit(&aio->aio_mtx); From b3cac61d4c7677d8b9855345df9f0d8b3f5ac2f6 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Sun, 24 Aug 2025 23:39:31 -0600 Subject: [PATCH 40/53] major misc bugs --- sys/kern/sys_aio.c | 442 +++++++++++++++++++++++++-------------------- sys/sys/aio.h | 3 +- 2 files changed, 248 insertions(+), 197 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index aca28f2b1c816..944e1cf6dce8e 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -303,6 +303,35 @@ aio_exit(struct proc *p, void *cookie) kmem_free(aio, sizeof(struct aioproc)); } +/* + * + */ +static inline void +aiosp_fg_teardown_locked(struct aiosp *sp, struct aiost_file_group *fg) +{ + if (fg == NULL) { + return; + } + + RB_REMOVE(aiost_file_tree, sp->fg_root, fg); + kmem_free(fg, sizeof(*fg)); +} + +/* + * + */ +static inline void +aiosp_fg_teardown(struct aiosp *sp, struct aiost_file_group *fg) +{ + if (fg == NULL) { + return; + } + + mutex_enter(&sp->mtx); + aiosp_fg_teardown_locked(sp, fg); + mutex_exit(&sp->mtx); +} + /* * Group jobs by file descriptor and distribute to service threads. * Regular files are coalesced per-fp, others get individual threads. @@ -311,7 +340,7 @@ aio_exit(struct proc *p, void *cookie) int aiosp_distribute_jobs(struct aiosp *sp) { - struct aio_job *job; + struct aio_job *job, *tmp; struct file *fp; int error = 0; @@ -321,22 +350,28 @@ aiosp_distribute_jobs(struct aiosp *sp) return 0; } - struct aio_job *tmp; TAILQ_FOREACH_SAFE(job, &sp->jobs, list, tmp) { - fp = fd_getfile2(job->p, job->aiocbp.aio_fildes); + fp = job->fp; if (fp == NULL) { - mutex_exit(&sp->mtx); - error = SET_ERROR(EBADF); - return error; + mutex_enter(&job->mtx); + job->completed = true; + job->aiocbp._errno = SET_ERROR(EBADF); + job->aiocbp._retval = -1; + aiowaitgrouplk_flush(&job->lk); + mutex_exit(&job->mtx); + + TAILQ_REMOVE(&sp->jobs, job, list); + sp->jobs_pending--; + job->on_queue = false; + continue; } struct aiost_file_group *fg = NULL; struct aiost *aiost = NULL; - if (fp->f_vnode && fp->f_vnode->v_type == VREG) { - struct aiost_file_group find = { 0 }; - find.fp = fp; - fg = RB_FIND(aiost_file_tree, sp->fg_root, &find); + if (fp->f_vnode != NULL && fp->f_vnode->v_type == VREG) { + struct aiost_file_group key = { .fp = fp }; + fg = RB_FIND(aiost_file_tree, sp->fg_root, &key); if (fg == NULL) { fg = kmem_zalloc(sizeof(*fg), KM_SLEEP); @@ -348,38 +383,27 @@ aiosp_distribute_jobs(struct aiosp *sp) error = aiosp_worker_extract(sp, &aiost); if (error) { kmem_free(fg, sizeof(*fg)); - closef(fp); mutex_exit(&sp->mtx); return error; } - RB_INSERT(aiost_file_tree, sp->fg_root, fg); fg->aiost = aiost; - - aiost->fg = fg; + + aiost->fg = fg; aiost->job = NULL; } else { - /* - * release fp as it already exists within fg - */ - closef(fp); aiost = fg->aiost; } } else { error = aiosp_worker_extract(sp, &aiost); if (error) { - closef(fp); mutex_exit(&sp->mtx); return error; } - - aiost->fg = NULL; + aiost->fg = NULL; aiost->job = job; } - /* - * Move from sp->jobs to fg->jobs - */ TAILQ_REMOVE(&sp->jobs, job, list); sp->jobs_pending--; job->on_queue = false; @@ -397,7 +421,6 @@ aiosp_distribute_jobs(struct aiosp *sp) } mutex_exit(&sp->mtx); - return error; } @@ -414,10 +437,8 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, struct timespec *ts, int flags) { struct aio_job *job; - int error = 0; - int timo; - size_t target = 0; - size_t monitor = 0; + int error = 0, timo; + size_t target = 0, monitor = 0; if (ts) { timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); @@ -480,7 +501,6 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, goto done; } } - done: wg->active = false; wg->refcnt--; @@ -555,45 +575,36 @@ aiosp_worker_extract(struct aiosp *sp, struct aiost **aiost) int aiosp_destroy(struct aiosp *sp, int *cn) { + printf("INSIDE DESTROY???\n"); struct aiost *st; - struct aiost *tmp; - int error = 0; - int cnt = 0; - - mutex_enter(&sp->mtx); + int error, cnt = 0; - /* - * Terminate and destroy every service thread both free and active. - */ - TAILQ_FOREACH_SAFE(st, &sp->freelist, list, tmp) { - error = aiost_terminate(st); - if (error) { - mutex_exit(&sp->mtx); - return error; - } + for (;;) { + /* peek one worker under sp->mtx. */ + mutex_enter(&sp->mtx); + st = TAILQ_FIRST(&sp->freelist); + if (st == NULL) + st = TAILQ_FIRST(&sp->active); + mutex_exit(&sp->mtx); - cnt++; - kmem_free(st, sizeof(*st)); - } + if (st == NULL) + break; - TAILQ_FOREACH_SAFE(st, &sp->active, list, tmp) { error = aiost_terminate(st); if (error) { - mutex_exit(&sp->mtx); - return error; + return error; } + st->lwp = NULL; - cnt++; kmem_free(st, sizeof(*st)); + cnt++; } if (cn) { *cn = cnt; } - mutex_exit(&sp->mtx); mutex_destroy(&sp->mtx); - return 0; } @@ -637,7 +648,7 @@ aiost_create(struct aiosp *sp, struct aiost **ret) sp->nthreads_free++; sp->nthreads_total++; - int error = kthread_create(PRI_USER, 0, NULL, aiost_entry, + int error = kthread_create(PRI_USER, KTHREAD_MUSTJOIN, NULL, aiost_entry, st, &st->lwp, "aio_%d_%ld", p->p_pid, sp->nthreads_total); if (error) { return error; @@ -690,6 +701,8 @@ aiost_process_fg (struct aiost *st) struct aio_job *tmp; TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { + TAILQ_REMOVE(&fg->queue, job, list); + if (job->aio_op & AIO_READ) { io_read(st, job); } else if (job->aio_op & AIO_WRITE) { @@ -708,11 +721,7 @@ aiost_process_fg (struct aiost *st) aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); } - mutex_enter(&sp->mtx); - RB_REMOVE(aiost_file_tree, sp->fg_root, fg); - closef(fg->fp); - kmem_free(fg, sizeof(*fg)); - mutex_exit(&sp->mtx); + aiosp_fg_teardown(sp, fg); } /* @@ -766,6 +775,14 @@ aiost_entry(void *arg) mutex_enter(&st->mtx); } + /* + * check whether or not a termination was queued while handling + * a job + */ + if (st->state == AIOST_STATE_TERMINATE) { + break; + } + st->state = AIOST_STATE_NONE; st->job = NULL; st->fg = NULL; @@ -791,18 +808,33 @@ aiost_entry(void *arg) } if (st->job) { + if (st->job->fp) { + closef(st->job->fp); + st->job->fp = NULL; + st->job->vp = NULL; + } + pool_put(&aio_job_pool, st->job); atomic_dec_uint(&aio_jobs_count); - } else { + } else if (st->fg) { struct aiost_file_group *fg = st->fg; - KASSERT(fg); while (!TAILQ_EMPTY(&fg->queue)) { struct aio_job *job = TAILQ_FIRST(&fg->queue); TAILQ_REMOVE(&fg->queue, job, list); + + if (st->job->fp) { + closef(st->job->fp); + st->job->fp = NULL; + st->job->vp = NULL; + } + pool_put(&aio_job_pool, job); atomic_dec_uint(&aio_jobs_count); } + + aiosp_fg_teardown(sp, fg); + st->fg = NULL; } @@ -869,22 +901,17 @@ uio_construct(struct aio_job *job, struct file **fp, struct iovec *aiov, struct uio *auio) { struct aiocb *aiocbp = &job->aiocbp; - int fd = aiocbp->aio_fildes; - int error = 0; - if (aiocbp->aio_nbytes > SSIZE_MAX) { - error = SET_ERROR(EINVAL); - return error; - } - - *fp = fd_getfile2(job->p, fd); - if (*fp == NULL) { - error = SET_ERROR(EBADF); - return error; - } + if (aiocbp->aio_nbytes > SSIZE_MAX) + return SET_ERROR(EINVAL); + + *fp = job->fp; + if (*fp == NULL) + return SET_ERROR(EBADF); aiov->iov_base = aiocbp->aio_buf; aiov->iov_len = aiocbp->aio_nbytes; + auio->uio_iov = aiov; auio->uio_iovcnt = 1; auio->uio_resid = aiocbp->aio_nbytes; @@ -900,47 +927,34 @@ uio_construct(struct aio_job *job, struct file **fp, struct iovec *aiov, static int io_write_fallback(struct aio_job *job) { - struct file *fp; + struct file *fp = NULL; struct iovec aiov; struct uio auio; - struct aiocb *aiocbp; + struct aiocb *aiocbp = &job->aiocbp; int error; error = uio_construct(job, &fp, &aiov, &auio); if (error) { - if (fp) { - closef(fp); - } - goto done; } - /* - * Perform write - */ - aiocbp = &job->aiocbp; - KASSERT(job->aio_op & AIO_WRITE); - + /* Write using pinned file */ if ((fp->f_flag & FWRITE) == 0) { - closef(fp); error = SET_ERROR(EBADF); goto done; } + auio.uio_rw = UIO_WRITE; error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset, &auio, fp->f_cred, FOF_UPDATE_OFFSET); - closef(fp); - - /* - * Store the result value - */ + /* result */ job->aiocbp.aio_nbytes -= auio.uio_resid; job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; + done: job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; - return 0; } @@ -950,98 +964,74 @@ io_write_fallback(struct aio_job *job) static int io_read_fallback(struct aio_job *job) { - struct file *fp; + struct file *fp = NULL; struct iovec aiov; struct uio auio; - struct aiocb *aiocbp; + struct aiocb *aiocbp = &job->aiocbp; int error; error = uio_construct(job, &fp, &aiov, &auio); - if (error) { - if (fp) { - closef(fp); - } + if (error) goto done; - } - - /* - * Perform read - */ - aiocbp = &job->aiocbp; - KASSERT((job->aio_op & AIO_WRITE) == 0); + /* Read using pinned file */ if ((fp->f_flag & FREAD) == 0) { - closef(fp); error = SET_ERROR(EBADF); goto done; } + auio.uio_rw = UIO_READ; error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, &auio, fp->f_cred, FOF_UPDATE_OFFSET); - closef(fp); - - /* - * Store the result value - */ + /* result */ job->aiocbp.aio_nbytes -= auio.uio_resid; job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; + done: job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; - return 0; } /* - * Flush file data to stable storage. + * Perform sync via file operations */ static int io_sync(struct aiost *aiost) { struct aio_job *job = aiost->job; - struct aiocb *aiocbp = &job->aiocbp; - struct file *fp; - int fd = aiocbp->aio_fildes; + struct file *fp = job->fp; int error = 0; - /* - * Perform a file sync operation - */ - struct vnode *vp; - - if ((error = fd_getvnode(fd, &fp)) != 0) { + if (fp == NULL) { + error = SET_ERROR(EBADF); goto done; } if ((fp->f_flag & FWRITE) == 0) { - fd_putfile(fd); error = SET_ERROR(EBADF); goto done; } - vp = fp->f_vnode; + struct vnode *vp = fp->f_vnode; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - if (job->aio_op & AIO_DSYNC) { - error = VOP_FSYNC(vp, fp->f_cred, - FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); - } else if (job->aio_op & AIO_SYNC) { - error = VOP_FSYNC(vp, fp->f_cred, - FSYNC_WAIT, 0, 0); + if (vp->v_type == VREG) { + if (job->aio_op & AIO_DSYNC) { + error = VOP_FSYNC(vp, fp->f_cred, + FSYNC_WAIT | FSYNC_DATAONLY, 0, 0); + } else { + error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); + } } VOP_UNLOCK(vp); - fd_putfile(fd); - /* - * Store the result value - */ job->aiocbp._retval = (error == 0) ? 0 : -1; done: job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; - copyout(&job->aiocbp, job->aiocb_uptr, - sizeof(struct aiocb)); + copyout(&job->aiocbp, job->aiocb_uptr, sizeof(struct aiocb)); return 0; } @@ -1078,15 +1068,23 @@ int aiosp_validate_conflicts(struct aiosp *aiosp, const void *uptr) { struct aiost *st; + struct aio_job *job; mutex_enter(&aiosp->mtx); /* check active threads */ TAILQ_FOREACH(st, &aiosp->active, list) { - KASSERT(st->job); - if (st->job->aiocb_uptr == uptr) { + job = st->job; + if (job && st->job->aiocb_uptr == uptr) { mutex_exit(&aiosp->mtx); return EINVAL; + } else { + TAILQ_FOREACH(job, &st->fg->queue, list) { + if (job->aiocb_uptr == uptr) { + mutex_exit(&aiosp->mtx); + return EINVAL; + } + } } } @@ -1103,7 +1101,7 @@ int aiosp_error(struct aiosp *aiosp, const void *uptr, register_t *retval) { struct aiocbp *aiocbp = NULL; struct aio_job *job; - int error; + int error = 0; error = aiocbp_lookup(aiosp, &aiocbp, uptr); if (error) { @@ -1123,28 +1121,39 @@ int aiosp_error(struct aiosp *aiosp, const void *uptr, register_t *retval) /* * Get return value of completed async I/O operation */ -int aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) +int +aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) { struct aiocbp *aiocbp = NULL; struct aio_job *job; int error; error = aiocbp_lookup(aiosp, &aiocbp, uptr); - if (error) { + if (error) return error; - } - job = aiocbp->job; - if (job->aiocbp._errno == EINPROGRESS || job->aiocbp._state != JOB_DONE) { + job = aiocbp->job; + if (job == NULL || job->aiocbp._state != JOB_DONE) return SET_ERROR(EINVAL); - } *retval = job->aiocbp._retval; - job->aiocbp._errno = 0; + /* Remove from lookup and free mapping */ + (void)aiocbp_remove(aiosp, uptr); + + /* Release job’s durable file ref (submit -> return) */ + if (job->fp) { + closef(job->fp); + job->fp = NULL; + job->vp = NULL; + } + + job->aiocbp._errno = 0; job->aiocbp._retval = -1; - job->aiocbp._state = JOB_NONE; + job->aiocbp._state = JOB_NONE; + pool_put(&aio_job_pool, job); + atomic_dec_uint(&aio_jobs_count); return 0; } @@ -1279,13 +1288,14 @@ aiocbp_destroy(struct aiosp *aiosp) kmem_free(aiocbp, sizeof(*aiocbp)); } } + mutex_exit(&aiosp->aio_hash_mtx); kmem_free(aiosp->aio_hash, aiosp->aio_hash_size * sizeof(*aiosp->aio_hash)); aiosp->aio_hash = NULL; aiosp->aio_hash_mask = 0; aiosp->aio_hash_size = 0; - mutex_exit(&aiosp->aio_hash_mtx); + mutex_destroy(&aiosp->aio_hash_mtx); } /* @@ -1422,6 +1432,9 @@ aiowaitgroup_join(struct aiowaitgroup *wg, struct aiowaitgrouplk *lk) mutex_exit(&lk->mtx); } +/* + * Enqueue the job. + */ /* * Enqueue the job. */ @@ -1476,9 +1489,8 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) aio = p->p_aio; if (aio) { error = aiosp_validate_conflicts(&aio->aiosp, aiocb_uptr); - if (error) { + if (error) return SET_ERROR(error); - } } /* @@ -1495,8 +1507,8 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) * Set the state with errno, and copy data * structure back to the user-space. */ - aiocb._state = JOB_WIP; - aiocb._errno = SET_ERROR(EINPROGRESS); + aiocb._state = JOB_WIP; + aiocb._errno = SET_ERROR(EINPROGRESS); aiocb._retval = -1; error = copyout(&aiocb, aiocb_uptr, sizeof(struct aiocb)); if (error) @@ -1516,6 +1528,39 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) a_job->lio = lio; mutex_init(&a_job->mtx, MUTEX_DEFAULT, IPL_NONE); aiowaitgrouplk_init(&a_job->lk); + a_job->p = p; + a_job->on_queue = false; + a_job->completed = false; + a_job->fp = NULL; + a_job->vp = NULL; + + { + const int fd = aiocb.aio_fildes; + struct file *fp = fd_getfile2(p, fd); + if (fp == NULL) { + pool_put(&aio_job_pool, a_job); + return SET_ERROR(EBADF); + } + mutex_enter(&fp->f_lock); + fp->f_count++; + mutex_exit(&fp->f_lock); + + a_job->fp = fp; + a_job->vp = fp->f_vnode; + } + + struct aiocbp *aiocbp = kmem_zalloc(sizeof(struct aiocbp), KM_SLEEP); + aiocbp->job = a_job; + aiocbp->uptr = aiocb_uptr; + error = aiocbp_insert(&aio->aiosp, aiocbp); + if (error) { + closef(a_job->fp); + a_job->fp = NULL; + a_job->vp = NULL; + kmem_free(aiocbp, sizeof(*aiocbp)); + pool_put(&aio_job_pool, a_job); + return SET_ERROR(error); + } /* * Add the job to the queue, update the counters, and @@ -1523,31 +1568,29 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) */ mutex_enter(&aio->aio_mtx); - /* Fail, if the limit was reached */ if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max || aio->jobs_count >= aio_listio_max) { atomic_dec_uint(&aio_jobs_count); mutex_exit(&aio->aio_mtx); + aiocbp_remove(&aio->aiosp, aiocb_uptr); + closef(a_job->fp); + a_job->fp = NULL; + a_job->vp = NULL; + kmem_free(aiocbp, sizeof(*aiocbp)); pool_put(&aio_job_pool, a_job); return SET_ERROR(EAGAIN); } - a_job->pri = PRI_KTHREAD; - a_job->p = curlwp->l_proc; - - struct aiocbp *aiocbp = kmem_zalloc(sizeof(struct aiocbp), KM_SLEEP); - aiocbp->job = a_job; - aiocbp->uptr = aiocb_uptr; - mutex_exit(&aio->aio_mtx); - error = aiocbp_insert(&aio->aiosp, aiocbp); - if (error) { - return SET_ERROR(error); - } - error = aiosp_enqueue_job(&aio->aiosp, a_job); if (error) { + (void)aiocbp_remove(&aio->aiosp, aiocb_uptr); + closef(a_job->fp); + a_job->fp = NULL; + a_job->vp = NULL; + kmem_free(aiocbp, sizeof(*aiocbp)); + pool_put(&aio_job_pool, a_job); return SET_ERROR(error); } @@ -1557,17 +1600,12 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) lio->refcnt++; mutex_exit(&aio->aio_mtx); - /* - * One would handle the errors only with aio_error() function. - * This way is appropriate according to POSIX. - */ return 0; } /* * Syscall functions. */ - int sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, register_t *retval) @@ -1583,6 +1621,8 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, struct filedesc *fdp = p->p_fd; struct aiosp *aiosp; struct aio_job *job; + struct file *fp; + struct aiost_file_group find = { 0 }, *fg; unsigned int fildes, canceled = 0; bool have_active = false; fdtab_t *dt; @@ -1594,6 +1634,7 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, return SET_ERROR(EBADF); if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) return SET_ERROR(EBADF); + fp = dt->dt_ff[fildes]->ff_file; /* Check if AIO structure is initialized */ if (p->p_aio == NULL) { @@ -1608,6 +1649,14 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, mutex_enter(&aio->aio_mtx); mutex_enter(&aiosp->mtx); + /* + * If there is a live file-group for this fp, then some requests + * are active and could not be canceled. + */ + find.fp = fp; + fg = RB_FIND(aiost_file_tree, aiosp->fg_root, &find); + have_active = (fg != NULL); + if (aiocbp_uptr) { struct aiocbp *aiocbp = NULL; error = aiocbp_lookup(aiosp, &aiocbp, aiocbp_uptr); @@ -1626,12 +1675,13 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, * beign processed. */ if (job->on_queue) { + mutex_enter(&job->mtx); TAILQ_REMOVE(&aiosp->jobs, job, list); + aiosp->jobs_pending--; job->on_queue = false; - - mutex_enter(&job->mtx); - aiowaitgrouplk_flush(&job->lk); job->completed = true; + job->aiocbp._errno = ECANCELED; + aiowaitgrouplk_flush(&job->lk); mutex_exit(&job->mtx); aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); @@ -1644,6 +1694,8 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, *retval = AIO_NOTCANCELED; } } + + aiosp_fg_teardown_locked(aiosp, fg); mutex_exit(&aiosp->mtx); mutex_exit(&aio->aio_mtx); @@ -1658,12 +1710,13 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, struct aio_job *tmp; TAILQ_FOREACH_SAFE(job, &aiosp->jobs, list, tmp) { if (job->aiocbp.aio_fildes == (int)fildes) { + mutex_enter(&job->mtx); TAILQ_REMOVE(&aiosp->jobs, job, list); + aiosp->jobs_pending--; job->on_queue = false; - - mutex_enter(&job->mtx); - aiowaitgrouplk_flush(&job->lk); job->completed = true; + job->aiocbp._errno = ECANCELED; + aiowaitgrouplk_flush(&job->lk); mutex_exit(&job->mtx); aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); @@ -1671,18 +1724,7 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, } } - /* - * If there is a live file-group for this fp, then some requests - * are active and could not be canceled. - */ - { - struct file *fp = dt->dt_ff[fildes]->ff_file; - struct aiost_file_group find = { 0 }, *fg; - - find.fp = fp; - fg = RB_FIND(aiost_file_tree, aiosp->fg_root, &find); - have_active = (fg != NULL); - } + aiosp_fg_teardown_locked(aiosp, fg); if (canceled > 0 && !have_active) { *retval = AIO_CANCELED; @@ -1738,11 +1780,15 @@ sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap, register_t *retval) { int error; + error = aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); - struct proc *p = curlwp->l_proc; + if (error) + return error; + + struct proc *p = l->l_proc; struct aioproc *aio = p->p_aio; - error = aiosp_distribute_jobs(&aio->aiosp); - return error; + KASSERT(aio); + return aiosp_distribute_jobs(&aio->aiosp); } int @@ -1808,12 +1854,16 @@ sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap, register_t *retval) { int error; + error = aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL); - struct proc *p = curlwp->l_proc; + if (error) { + return error; + } + + struct proc *p = l->l_proc; struct aioproc *aio = p->p_aio; KASSERT(aio); - error = aiosp_distribute_jobs(&aio->aiosp); - return error; + return aiosp_distribute_jobs(&aio->aiosp); } int @@ -2056,12 +2106,12 @@ aio_print_jobs(void (*pr)(const char *, ...)) (*pr)(" op=%d err=%d state=%d uptr=%p completed=%d\n", job->aio_op, job->aiocbp._errno, job->aiocbp._state, job->aiocb_uptr, job->completed); - (*pr)(" fd=%d off=%llu buf=%p nbytes=%zu pri=%d lio=%p\n", + (*pr)(" fd=%d off=%llu buf=%p nbytes=%zu lio=%p\n", job->aiocbp.aio_fildes, (unsigned long long)job->aiocbp.aio_offset, (void *)job->aiocbp.aio_buf, (size_t)job->aiocbp.aio_nbytes, - (int)job->pri, job->lio); + job->lio); } /* Active service threads */ diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 8da7e34b76255..00270b0fb8ba9 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -116,11 +116,12 @@ struct aio_job { kmutex_t mtx; /* Protects completed flag */ int aio_op; /* Operation type (AIO_READ/WRITE/SYNC) */ struct aiocb aiocbp; /* User-visible AIO control block */ - pri_t pri; /* Scheduling priority */ void *aiocb_uptr; /* User pointer for job identification */ struct proc *p; /* Originating process */ bool completed; /* Job completion status */ bool on_queue; /* Whether or not this job is on sp->jobs */ + struct file *fp; + struct vnode *vp; struct aiowaitgrouplk lk; /* List of waitgroups waiting on this job */ TAILQ_ENTRY(aio_job) list; struct lio_req *lio; /* List I/O request (if part of lio_listio) */ From 673c0cfbf5ba77d23420fbf0d9836026c358b8c9 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Mon, 25 Aug 2025 18:33:50 -0600 Subject: [PATCH 41/53] refine comments and tweaks --- sys/kern/sys_aio.c | 95 +++++++++++++++++++++------------------------- sys/sys/aio.h | 4 +- 2 files changed, 46 insertions(+), 53 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 944e1cf6dce8e..f0747d05af492 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -304,7 +304,7 @@ aio_exit(struct proc *p, void *cookie) } /* - * + * Remove file group from tree locked */ static inline void aiosp_fg_teardown_locked(struct aiosp *sp, struct aiost_file_group *fg) @@ -318,7 +318,7 @@ aiosp_fg_teardown_locked(struct aiosp *sp, struct aiost_file_group *fg) } /* - * + * Remove file group from tree */ static inline void aiosp_fg_teardown(struct aiosp *sp, struct aiost_file_group *fg) @@ -355,14 +355,15 @@ aiosp_distribute_jobs(struct aiosp *sp) if (fp == NULL) { mutex_enter(&job->mtx); job->completed = true; - job->aiocbp._errno = SET_ERROR(EBADF); + job->aiocbp._errno = SET_ERROR(EBADF); job->aiocbp._retval = -1; - aiowaitgrouplk_flush(&job->lk); mutex_exit(&job->mtx); + aiowaitgrouplk_flush(&job->lk); TAILQ_REMOVE(&sp->jobs, job, list); sp->jobs_pending--; job->on_queue = false; + continue; } @@ -389,7 +390,7 @@ aiosp_distribute_jobs(struct aiosp *sp) RB_INSERT(aiost_file_tree, sp->fg_root, fg); fg->aiost = aiost; - aiost->fg = fg; + aiost->fg = fg; aiost->job = NULL; } else { aiost = fg->aiost; @@ -400,7 +401,7 @@ aiosp_distribute_jobs(struct aiosp *sp) mutex_exit(&sp->mtx); return error; } - aiost->fg = NULL; + aiost->fg = NULL; aiost->job = job; } @@ -984,10 +985,8 @@ io_read_fallback(struct aio_job *job) error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset, &auio, fp->f_cred, FOF_UPDATE_OFFSET); - /* result */ job->aiocbp.aio_nbytes -= auio.uio_resid; job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; - done: job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; @@ -1137,23 +1136,21 @@ aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) return SET_ERROR(EINVAL); *retval = job->aiocbp._retval; + aiocbp_remove(aiosp, uptr); - /* Remove from lookup and free mapping */ - (void)aiocbp_remove(aiosp, uptr); - - /* Release job’s durable file ref (submit -> return) */ if (job->fp) { closef(job->fp); job->fp = NULL; job->vp = NULL; } - job->aiocbp._errno = 0; + job->aiocbp._errno = 0; job->aiocbp._retval = -1; - job->aiocbp._state = JOB_NONE; + job->aiocbp._state = JOB_NONE; pool_put(&aio_job_pool, job); atomic_dec_uint(&aio_jobs_count); + return 0; } @@ -1432,9 +1429,6 @@ aiowaitgroup_join(struct aiowaitgroup *wg, struct aiowaitgrouplk *lk) mutex_exit(&lk->mtx); } -/* - * Enqueue the job. - */ /* * Enqueue the job. */ @@ -1483,7 +1477,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) } /* - * Look for already existing job. If found - the job is in-progress. + * Look for already existing job. If found the job is in-progress. * According to POSIX this is invalid, so return the error. */ aio = p->p_aio; @@ -1494,21 +1488,21 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) } /* - * Check if AIO structure is initialized, if not - initialize it. - * In LIO case, we did that already. We will recheck this with - * the lock in aio_procinit(). + * Check if AIO structure is initialized, if not initialize it */ - if (lio == NULL && p->p_aio == NULL) - if (aio_procinit(p)) + if (p->p_aio == NULL) { + if (aio_procinit(p)) { return SET_ERROR(EAGAIN); + } + } aio = p->p_aio; /* * Set the state with errno, and copy data * structure back to the user-space. */ - aiocb._state = JOB_WIP; - aiocb._errno = SET_ERROR(EINPROGRESS); + aiocb._state = JOB_WIP; + aiocb._errno = SET_ERROR(EINPROGRESS); aiocb._retval = -1; error = copyout(&aiocb, aiocb_uptr, sizeof(struct aiocb)); if (error) @@ -1517,11 +1511,6 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) /* Allocate and initialize a new AIO job */ a_job = pool_get(&aio_job_pool, PR_WAITOK | PR_ZERO); - /* - * Set the data. - * Store the user-space pointer for searching. Since we - * are storing only per proc pointers - it is safe. - */ memcpy(&a_job->aiocbp, &aiocb, sizeof(struct aiocb)); a_job->aiocb_uptr = aiocb_uptr; a_job->aio_op |= op; @@ -1541,16 +1530,13 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) pool_put(&aio_job_pool, a_job); return SET_ERROR(EBADF); } - mutex_enter(&fp->f_lock); - fp->f_count++; - mutex_exit(&fp->f_lock); a_job->fp = fp; a_job->vp = fp->f_vnode; } struct aiocbp *aiocbp = kmem_zalloc(sizeof(struct aiocbp), KM_SLEEP); - aiocbp->job = a_job; + aiocbp->job = a_job; aiocbp->uptr = aiocb_uptr; error = aiocbp_insert(&aio->aiosp, aiocbp); if (error) { @@ -1567,17 +1553,20 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) * notify the AIO worker thread to handle the job. */ mutex_enter(&aio->aio_mtx); - if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max || aio->jobs_count >= aio_listio_max) { atomic_dec_uint(&aio_jobs_count); + mutex_exit(&aio->aio_mtx); aiocbp_remove(&aio->aiosp, aiocb_uptr); + closef(a_job->fp); a_job->fp = NULL; a_job->vp = NULL; + kmem_free(aiocbp, sizeof(*aiocbp)); pool_put(&aio_job_pool, a_job); + return SET_ERROR(EAGAIN); } @@ -1585,12 +1574,15 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) error = aiosp_enqueue_job(&aio->aiosp, a_job); if (error) { - (void)aiocbp_remove(&aio->aiosp, aiocb_uptr); + aiocbp_remove(&aio->aiosp, aiocb_uptr); closef(a_job->fp); + a_job->fp = NULL; a_job->vp = NULL; + kmem_free(aiocbp, sizeof(*aiocbp)); pool_put(&aio_job_pool, a_job); + return SET_ERROR(error); } @@ -1813,18 +1805,20 @@ int sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, register_t *retval) { - /* { - syscallarg(const struct aiocb *const[]) list; - syscallarg(int) nent; - syscallarg(const struct timespec *) timeout; - } */ + struct proc *p = l->l_proc; + struct aioproc *aio = p->p_aio; struct aiocb **list; struct timespec ts; int error, nent; nent = SCARG(uap, nent); - if (nent <= 0 || nent > aio_listio_max) + if (nent <= 0 || nent > aio_listio_max) { return SET_ERROR(EAGAIN); + } + + if (aio == NULL) { + return SET_ERROR(EINVAL); + } if (SCARG(uap, timeout)) { /* Convert timespec to ticks */ @@ -1836,14 +1830,12 @@ sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, list = kmem_alloc(nent * sizeof(*list), KM_SLEEP); error = copyin(SCARG(uap, list), list, nent * sizeof(*list)); - if (error) + if (error) { goto out; + } - struct proc *p = l->l_proc; - struct aioproc *aio = p->p_aio; - KASSERT(aio); error = aiosp_suspend(&aio->aiosp, list, nent, SCARG(uap, timeout) ? - &ts : NULL, AIOSP_SUSPEND_ALL); + &ts : NULL, AIOSP_SUSPEND_ANY); out: kmem_free(list, nent * sizeof(*list)); return error; @@ -1891,10 +1883,12 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, if (aio_jobs_count + nent > aio_max) return SET_ERROR(EAGAIN); - /* Check if AIO structure is initialized, if not - initialize it */ - if (p->p_aio == NULL) - if (aio_procinit(p)) + /* Check if AIO structure is initialized, if not initialize it */ + if (p->p_aio == NULL) { + if (aio_procinit(p)) { return SET_ERROR(EAGAIN); + } + } aio = p->p_aio; /* Create a LIO structure */ @@ -1986,7 +1980,6 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, /* * SysCtl */ - static int sysctl_aio_listio_max(SYSCTLFN_ARGS) { diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 00270b0fb8ba9..ff8249cb8f94e 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -120,8 +120,8 @@ struct aio_job { struct proc *p; /* Originating process */ bool completed; /* Job completion status */ bool on_queue; /* Whether or not this job is on sp->jobs */ - struct file *fp; - struct vnode *vp; + struct file *fp; /* File pointer associated with the job */ + struct vnode *vp; /* Vnode pointer associated with the job */ struct aiowaitgrouplk lk; /* List of waitgroups waiting on this job */ TAILQ_ENTRY(aio_job) list; struct lio_req *lio; /* List I/O request (if part of lio_listio) */ From a38e9df19c572b72ce3455c0df65306a1dd2e26f Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Mon, 25 Aug 2025 20:08:12 -0600 Subject: [PATCH 42/53] fix double fg free --- sys/kern/sys_aio.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index f0747d05af492..72258044e4a21 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -313,7 +313,9 @@ aiosp_fg_teardown_locked(struct aiosp *sp, struct aiost_file_group *fg) return; } + printf("WHAT!\n"); RB_REMOVE(aiost_file_tree, sp->fg_root, fg); + printf("WHAT! I AM OUT!!!\n"); kmem_free(fg, sizeof(*fg)); } @@ -700,6 +702,8 @@ aiost_process_fg (struct aiost *st) struct aiost_file_group *fg = st->fg; struct aio_job *job; + st->fg = NULL; + struct aio_job *tmp; TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { TAILQ_REMOVE(&fg->queue, job, list); @@ -819,15 +823,16 @@ aiost_entry(void *arg) atomic_dec_uint(&aio_jobs_count); } else if (st->fg) { struct aiost_file_group *fg = st->fg; + st->fg = NULL; while (!TAILQ_EMPTY(&fg->queue)) { struct aio_job *job = TAILQ_FIRST(&fg->queue); TAILQ_REMOVE(&fg->queue, job, list); - if (st->job->fp) { - closef(st->job->fp); - st->job->fp = NULL; - st->job->vp = NULL; + if (job->fp) { + closef(job->fp); + job->fp = NULL; + job->vp = NULL; } pool_put(&aio_job_pool, job); @@ -835,7 +840,6 @@ aiost_entry(void *arg) } aiosp_fg_teardown(sp, fg); - st->fg = NULL; } @@ -1686,8 +1690,6 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, *retval = AIO_NOTCANCELED; } } - - aiosp_fg_teardown_locked(aiosp, fg); mutex_exit(&aiosp->mtx); mutex_exit(&aio->aio_mtx); @@ -1716,8 +1718,6 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, } } - aiosp_fg_teardown_locked(aiosp, fg); - if (canceled > 0 && !have_active) { *retval = AIO_CANCELED; } else if (canceled == 0) { From 58e3c8189542ad7ff51b00fc15d586591c811274 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Mon, 25 Aug 2025 20:24:53 -0600 Subject: [PATCH 43/53] hardening around aiost --- sys/kern/sys_aio.c | 54 +++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 72258044e4a21..8fb00986827f9 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -131,9 +131,9 @@ static void aiost_entry(void *); static void aiost_sigsend(struct proc *, struct sigevent *); static int aiosp_worker_extract(struct aiosp *, struct aiost **); -static int io_write(struct aiost *, struct aio_job *); -static int io_read(struct aiost *, struct aio_job *); -static int io_sync(struct aiost *); +static int io_write(struct aio_job *); +static int io_read(struct aio_job *); +static int io_sync(struct aio_job *); static int uio_construct(struct aio_job *, struct file **, struct iovec *, struct uio *); static int io_write_fallback(struct aio_job *); @@ -668,18 +668,15 @@ aiost_create(struct aiosp *sp, struct aiost **ret) * Process single job without coalescing. */ static void -aiost_process_singleton (struct aiost *st) +aiost_process_singleton (struct aio_job *job) { - struct aio_job *job; - - job = st->job; - KASSERT(job != NULL); + KASSERT(job); if (job->aio_op & AIO_READ) { io_read_fallback(job); } else if (job->aio_op & AIO_WRITE) { io_write_fallback(job); } else if (job->aio_op & AIO_SYNC) { - io_sync(st); + io_sync(job); } else { panic("aio_process: invalid operation code\n"); } @@ -696,24 +693,19 @@ aiost_process_singleton (struct aiost *st) * Process all jobs in a file group. */ static void -aiost_process_fg (struct aiost *st) +aiost_process_fg (struct aiosp *sp, struct aiost_file_group *fg) { - struct aiosp *sp = st->aiosp; - struct aiost_file_group *fg = st->fg; struct aio_job *job; - - st->fg = NULL; - struct aio_job *tmp; TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { TAILQ_REMOVE(&fg->queue, job, list); - if (job->aio_op & AIO_READ) { - io_read(st, job); - } else if (job->aio_op & AIO_WRITE) { - io_write(st, job); - } else if (job->aio_op & AIO_SYNC) { - io_sync(st); + if ((job->aio_op & AIO_READ) == AIO_READ) { + io_read(job); + } else if ((job->aio_op & AIO_WRITE) == AIO_WRITE) { + io_write(job); + } else if ((job->aio_op & AIO_SYNC) == AIO_SYNC) { + io_sync(job); } else { panic("aio_process: invalid operation code\n"); } @@ -725,8 +717,6 @@ aiost_process_fg (struct aiost *st) aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); } - - aiosp_fg_teardown(sp, fg); } /* @@ -771,12 +761,19 @@ aiost_entry(void *arg) } if (st->fg) { + struct aiost_file_group *fg = st->fg; + + st->fg = NULL; + aiosp_fg_teardown(sp, fg); + mutex_exit(&st->mtx); - aiost_process_fg(st); + aiost_process_fg(sp, fg); mutex_enter(&st->mtx); } else { + struct aio_job *job = st->job; + mutex_exit(&st->mtx); - aiost_process_singleton(st); + aiost_process_singleton(job); mutex_enter(&st->mtx); } @@ -884,7 +881,7 @@ aiost_sigsend(struct proc *p, struct sigevent *sig) * Process write operation for non-blocking jobs. */ static int -io_write(struct aiost *aiost, struct aio_job *job) +io_write(struct aio_job *job) { return io_write_fallback(job); } @@ -893,7 +890,7 @@ io_write(struct aiost *aiost, struct aio_job *job) * Process read operation for non-blocking jobs. */ static int -io_read(struct aiost *aiost, struct aio_job *job) +io_read(struct aio_job *job) { return io_read_fallback(job); } @@ -1001,9 +998,8 @@ io_read_fallback(struct aio_job *job) * Perform sync via file operations */ static int -io_sync(struct aiost *aiost) +io_sync(struct aio_job *job) { - struct aio_job *job = aiost->job; struct file *fp = job->fp; int error = 0; From 037be883a406dd16a8883f1ed5473e01585872f0 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Tue, 26 Aug 2025 02:57:21 -0600 Subject: [PATCH 44/53] precise refcnt handling and misc bugs --- sys/kern/sys_aio.c | 168 +++++++++++++++++++++++++++------------------ sys/sys/aio.h | 1 - 2 files changed, 100 insertions(+), 69 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 8fb00986827f9..420985744a14f 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -139,6 +139,11 @@ static int uio_construct(struct aio_job *, struct file **, static int io_write_fallback(struct aio_job *); static int io_read_fallback(struct aio_job *); +static void aio_job_fini (struct aio_job *); +static void aio_job_mark_complete (struct aio_job *); +static void aio_file_hold (struct file *); +static void aio_file_release (struct file *); + static const struct syscall_package aio_syscalls[] = { { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel }, { SYS_aio_error, 0, (sy_call_t *)sys_aio_error }, @@ -303,6 +308,59 @@ aio_exit(struct proc *p, void *cookie) kmem_free(aio, sizeof(struct aioproc)); } +/* + * + */ +static void +aio_job_fini (struct aio_job *job) +{ + aiowaitgrouplk_fini(&job->lk); + mutex_destroy(&job->mtx); +} + +/* + * + */ +static void +aio_job_mark_complete (struct aio_job *job) +{ + mutex_enter(&job->mtx); + job->completed = true; + aio_file_release(job->fp); + + aiowaitgrouplk_flush(&job->lk); + mutex_exit(&job->mtx); + + aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); +} + +/* + * Acquire a file reference for async ops + */ +static void +aio_file_hold (struct file *fp) +{ + mutex_enter(&fp->f_lock); + fp->f_count++; + mutex_exit(&fp->f_lock); +} + +/* + * Release a file reference for async ops + */ +static void +aio_file_release (struct file *fp) +{ + mutex_enter(&fp->f_lock); + fp->f_count--; + if (!fp->f_count) { + mutex_exit(&fp->f_lock); + closef(fp); + return; + } + mutex_exit(&fp->f_lock); +} + /* * Remove file group from tree locked */ @@ -313,9 +371,7 @@ aiosp_fg_teardown_locked(struct aiosp *sp, struct aiost_file_group *fg) return; } - printf("WHAT!\n"); RB_REMOVE(aiost_file_tree, sp->fg_root, fg); - printf("WHAT! I AM OUT!!!\n"); kmem_free(fg, sizeof(*fg)); } @@ -354,20 +410,7 @@ aiosp_distribute_jobs(struct aiosp *sp) TAILQ_FOREACH_SAFE(job, &sp->jobs, list, tmp) { fp = job->fp; - if (fp == NULL) { - mutex_enter(&job->mtx); - job->completed = true; - job->aiocbp._errno = SET_ERROR(EBADF); - job->aiocbp._retval = -1; - mutex_exit(&job->mtx); - aiowaitgrouplk_flush(&job->lk); - - TAILQ_REMOVE(&sp->jobs, job, list); - sp->jobs_pending--; - job->on_queue = false; - - continue; - } + KASSERT(fp); struct aiost_file_group *fg = NULL; struct aiost *aiost = NULL; @@ -379,7 +422,6 @@ aiosp_distribute_jobs(struct aiosp *sp) if (fg == NULL) { fg = kmem_zalloc(sizeof(*fg), KM_SLEEP); fg->fp = fp; - fg->vp = fp->f_vnode; fg->queue_size = 0; TAILQ_INIT(&fg->queue); @@ -526,7 +568,7 @@ aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent, struct aioproc *aio = p->p_aio; struct aiosp *aiosp = &aio->aiosp; - return aiosp_suspend(aiosp, aiocbp_list, nent, ts, AIOSP_SUSPEND_ANY); + return aiosp_suspend(aiosp, aiocbp_list, nent, ts, AIOSP_SUSPEND_ALL); } /* @@ -578,7 +620,6 @@ aiosp_worker_extract(struct aiosp *sp, struct aiost **aiost) int aiosp_destroy(struct aiosp *sp, int *cn) { - printf("INSIDE DESTROY???\n"); struct aiost *st; int error, cnt = 0; @@ -681,12 +722,7 @@ aiost_process_singleton (struct aio_job *job) panic("aio_process: invalid operation code\n"); } - mutex_enter(&job->mtx); - aiowaitgrouplk_flush(&job->lk); - job->completed = true; - mutex_exit(&job->mtx); - - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + aio_job_mark_complete(job); } /* @@ -697,6 +733,7 @@ aiost_process_fg (struct aiosp *sp, struct aiost_file_group *fg) { struct aio_job *job; struct aio_job *tmp; + TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { TAILQ_REMOVE(&fg->queue, job, list); @@ -710,12 +747,7 @@ aiost_process_fg (struct aiosp *sp, struct aiost_file_group *fg) panic("aio_process: invalid operation code\n"); } - mutex_enter(&job->mtx); - aiowaitgrouplk_flush(&job->lk); - job->completed = true; - mutex_exit(&job->mtx); - - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + aio_job_mark_complete(job); } } @@ -762,13 +794,13 @@ aiost_entry(void *arg) if (st->fg) { struct aiost_file_group *fg = st->fg; - st->fg = NULL; - aiosp_fg_teardown(sp, fg); mutex_exit(&st->mtx); aiost_process_fg(sp, fg); mutex_enter(&st->mtx); + + aiosp_fg_teardown(sp, fg); } else { struct aio_job *job = st->job; @@ -811,11 +843,11 @@ aiost_entry(void *arg) if (st->job) { if (st->job->fp) { - closef(st->job->fp); + aio_file_release(st->job->fp); st->job->fp = NULL; - st->job->vp = NULL; } + aio_job_fini(st->job); pool_put(&aio_job_pool, st->job); atomic_dec_uint(&aio_jobs_count); } else if (st->fg) { @@ -827,11 +859,11 @@ aiost_entry(void *arg) TAILQ_REMOVE(&fg->queue, job, list); if (job->fp) { - closef(job->fp); + aio_file_release(job->fp); job->fp = NULL; - job->vp = NULL; } + aio_job_fini(job); pool_put(&aio_job_pool, job); atomic_dec_uint(&aio_jobs_count); } @@ -953,7 +985,6 @@ io_write_fallback(struct aio_job *job) /* result */ job->aiocbp.aio_nbytes -= auio.uio_resid; job->aiocbp._retval = (error == 0) ? job->aiocbp.aio_nbytes : -1; - done: job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; @@ -1139,15 +1170,15 @@ aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) aiocbp_remove(aiosp, uptr); if (job->fp) { - closef(job->fp); + aio_file_release(job->fp); job->fp = NULL; - job->vp = NULL; } job->aiocbp._errno = 0; job->aiocbp._retval = -1; job->aiocbp._state = JOB_NONE; + aio_job_fini(job); pool_put(&aio_job_pool, job); atomic_dec_uint(&aio_jobs_count); @@ -1183,8 +1214,8 @@ aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, const void *uptr) } } mutex_exit(&aiosp->aio_hash_mtx); - - return ENOENT; + + return SET_ERROR(ENOENT); } /* @@ -1209,7 +1240,7 @@ aiocbp_remove(struct aiosp *aiosp, const void *uptr) } mutex_exit(&aiosp->aio_hash_mtx); - return ENOENT; + return SET_ERROR(ENOENT); } /* @@ -1465,13 +1496,17 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) /* Check the opcode, if LIO_NOP - simply ignore */ if (op == AIO_LIO) { KASSERT(lio != NULL); - if (aiocb.aio_lio_opcode == LIO_WRITE) + if (aiocb.aio_lio_opcode == LIO_WRITE) { op = AIO_WRITE; - else if (aiocb.aio_lio_opcode == LIO_READ) + } else if (aiocb.aio_lio_opcode == LIO_READ) { op = AIO_READ; - else - return (aiocb.aio_lio_opcode == LIO_NOP) ? 0 : - SET_ERROR(EINVAL); + } else { + if (aiocb.aio_lio_opcode == LIO_NOP) { + return 0; + } else { + return SET_ERROR(EINVAL); + } + } } else { KASSERT(lio == NULL); } @@ -1521,18 +1556,18 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) a_job->on_queue = false; a_job->completed = false; a_job->fp = NULL; - a_job->vp = NULL; { const int fd = aiocb.aio_fildes; struct file *fp = fd_getfile2(p, fd); if (fp == NULL) { + aio_job_fini(a_job); pool_put(&aio_job_pool, a_job); return SET_ERROR(EBADF); } - + + aio_file_hold(fp); a_job->fp = fp; - a_job->vp = fp->f_vnode; } struct aiocbp *aiocbp = kmem_zalloc(sizeof(struct aiocbp), KM_SLEEP); @@ -1540,10 +1575,10 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) aiocbp->uptr = aiocb_uptr; error = aiocbp_insert(&aio->aiosp, aiocbp); if (error) { - closef(a_job->fp); + aio_file_release(a_job->fp); a_job->fp = NULL; - a_job->vp = NULL; kmem_free(aiocbp, sizeof(*aiocbp)); + aio_job_fini(a_job); pool_put(&aio_job_pool, a_job); return SET_ERROR(error); } @@ -1559,12 +1594,12 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) mutex_exit(&aio->aio_mtx); aiocbp_remove(&aio->aiosp, aiocb_uptr); + kmem_free(aiocbp, sizeof(*aiocbp)); - closef(a_job->fp); + aio_file_release(a_job->fp); a_job->fp = NULL; - a_job->vp = NULL; - kmem_free(aiocbp, sizeof(*aiocbp)); + aio_job_fini(a_job); pool_put(&aio_job_pool, a_job); return SET_ERROR(EAGAIN); @@ -1575,12 +1610,12 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) error = aiosp_enqueue_job(&aio->aiosp, a_job); if (error) { aiocbp_remove(&aio->aiosp, aiocb_uptr); - closef(a_job->fp); + kmem_free(aiocbp, sizeof(*aiocbp)); + aio_file_release(a_job->fp); a_job->fp = NULL; - a_job->vp = NULL; - kmem_free(aiocbp, sizeof(*aiocbp)); + aio_job_fini(a_job); pool_put(&aio_job_pool, a_job); return SET_ERROR(error); @@ -1671,12 +1706,10 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, TAILQ_REMOVE(&aiosp->jobs, job, list); aiosp->jobs_pending--; job->on_queue = false; - job->completed = true; job->aiocbp._errno = ECANCELED; - aiowaitgrouplk_flush(&job->lk); mutex_exit(&job->mtx); - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + aio_job_mark_complete(job); *retval = AIO_CANCELED; } else { @@ -1704,12 +1737,11 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, TAILQ_REMOVE(&aiosp->jobs, job, list); aiosp->jobs_pending--; job->on_queue = false; - job->completed = true; job->aiocbp._errno = ECANCELED; - aiowaitgrouplk_flush(&job->lk); mutex_exit(&job->mtx); - aiost_sigsend(job->p, &job->aiocbp.aio_sigevent); + aio_job_mark_complete(job); + canceled++; } } @@ -1831,7 +1863,7 @@ sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, } error = aiosp_suspend(&aio->aiosp, list, nent, SCARG(uap, timeout) ? - &ts : NULL, AIOSP_SUSPEND_ANY); + &ts : NULL, AIOSP_SUSPEND_ALL); out: kmem_free(list, nent * sizeof(*list)); return error; @@ -1868,7 +1900,7 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, struct aioproc *aio; struct aiocb **aiocbp_list; struct lio_req *lio; - int i, error, errcnt, mode, nent; + int i, error = 0, errcnt, mode, nent; mode = SCARG(uap, mode); nent = SCARG(uap, nent); @@ -1945,7 +1977,7 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, error = aiosp_distribute_jobs(&aio->aiosp); if (error) { - return error; + goto err; } mutex_enter(&aio->aio_mtx); diff --git a/sys/sys/aio.h b/sys/sys/aio.h index ff8249cb8f94e..2942667fde872 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -121,7 +121,6 @@ struct aio_job { bool completed; /* Job completion status */ bool on_queue; /* Whether or not this job is on sp->jobs */ struct file *fp; /* File pointer associated with the job */ - struct vnode *vp; /* Vnode pointer associated with the job */ struct aiowaitgrouplk lk; /* List of waitgroups waiting on this job */ TAILQ_ENTRY(aio_job) list; struct lio_req *lio; /* List I/O request (if part of lio_listio) */ From 8ab33eca045b00126162a5cd80de2fc86ccac5eb Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Tue, 26 Aug 2025 23:45:50 -0600 Subject: [PATCH 45/53] aio cancel --- sys/kern/sys_aio.c | 149 +++++++++++++++++++++++++-------------------- sys/sys/aio.h | 1 - 2 files changed, 84 insertions(+), 66 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 420985744a14f..a5401b56c1f9e 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -61,7 +61,6 @@ * Enables future enhancements like dynamic job appending during processing. * * Implementation notes - * * io_read/io_write currently use fallback implementations */ @@ -139,10 +138,10 @@ static int uio_construct(struct aio_job *, struct file **, static int io_write_fallback(struct aio_job *); static int io_read_fallback(struct aio_job *); -static void aio_job_fini (struct aio_job *); -static void aio_job_mark_complete (struct aio_job *); -static void aio_file_hold (struct file *); -static void aio_file_release (struct file *); +static void aio_job_fini(struct aio_job *); +static void aio_job_mark_complete(struct aio_job *); +static void aio_file_hold(struct file *); +static void aio_file_release(struct file *); static const struct syscall_package aio_syscalls[] = { { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel }, @@ -227,7 +226,7 @@ aio_init(void) error = syscall_establish(NULL, aio_syscalls); if (error != 0) - (void)aio_fini(false); + aio_fini(false); return error; } @@ -568,7 +567,7 @@ aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent, struct aioproc *aio = p->p_aio; struct aiosp *aiosp = &aio->aiosp; - return aiosp_suspend(aiosp, aiocbp_list, nent, ts, AIOSP_SUSPEND_ALL); + return aiosp_suspend(aiosp, aiocbp_list, nent, ts, AIOSP_SUSPEND_ANY); } /* @@ -736,6 +735,7 @@ aiost_process_fg (struct aiosp *sp, struct aiost_file_group *fg) TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { TAILQ_REMOVE(&fg->queue, job, list); + fg->queue_size--; if ((job->aio_op & AIO_READ) == AIO_READ) { io_read(job); @@ -857,6 +857,7 @@ aiost_entry(void *arg) while (!TAILQ_EMPTY(&fg->queue)) { struct aio_job *job = TAILQ_FIRST(&fg->queue); TAILQ_REMOVE(&fg->queue, job, list); + fg->queue_size--; if (job->fp) { aio_file_release(job->fp); @@ -940,8 +941,9 @@ uio_construct(struct aio_job *job, struct file **fp, struct iovec *aiov, return SET_ERROR(EINVAL); *fp = job->fp; - if (*fp == NULL) + if (*fp == NULL) { return SET_ERROR(EBADF); + } aiov->iov_base = aiocbp->aio_buf; aiov->iov_len = aiocbp->aio_nbytes; @@ -1137,6 +1139,9 @@ int aiosp_error(struct aiosp *aiosp, const void *uptr, register_t *retval) if (error) { return error; } + if (aiocbp == NULL) { + return SET_ERROR(ENOENT); + } job = aiocbp->job; if (job->aiocbp._state == JOB_NONE) { @@ -1159,12 +1164,17 @@ aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) int error; error = aiocbp_lookup(aiosp, &aiocbp, uptr); - if (error) + if (error) { return error; + } + if (aiocbp == NULL) { + return SET_ERROR(ENOENT); + } job = aiocbp->job; - if (job == NULL || job->aiocbp._state != JOB_DONE) + if (job == NULL || job->aiocbp._state != JOB_DONE) { return SET_ERROR(EINVAL); + } *retval = job->aiocbp._retval; aiocbp_remove(aiosp, uptr); @@ -1215,7 +1225,8 @@ aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, const void *uptr) } mutex_exit(&aiosp->aio_hash_mtx); - return SET_ERROR(ENOENT); + *aiocbpp = NULL; + return 0; } /* @@ -1479,14 +1490,16 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) /* Get the data structure from user-space */ error = copyin(aiocb_uptr, &aiocb, sizeof(struct aiocb)); - if (error) + if (error) { return error; + } /* Check if signal is set, and validate it */ sig = &aiocb.aio_sigevent; if (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG || - sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA) + sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA) { return SET_ERROR(EINVAL); + } /* Buffer and byte count */ if (((AIO_SYNC | AIO_DSYNC) & op) == 0) @@ -1518,8 +1531,9 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) aio = p->p_aio; if (aio) { error = aiosp_validate_conflicts(&aio->aiosp, aiocb_uptr); - if (error) + if (error) { return SET_ERROR(error); + } } /* @@ -1540,8 +1554,9 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) aiocb._errno = SET_ERROR(EINPROGRESS); aiocb._retval = -1; error = copyout(&aiocb, aiocb_uptr, sizeof(struct aiocb)); - if (error) + if (error) { return error; + } /* Allocate and initialize a new AIO job */ a_job = pool_get(&aio_job_pool, PR_WAITOK | PR_ZERO); @@ -1623,8 +1638,9 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) mutex_enter(&aio->aio_mtx); aio->jobs_count++; - if (lio) + if (lio) { lio->refcnt++; + } mutex_exit(&aio->aio_mtx); return 0; @@ -1637,11 +1653,6 @@ int sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, register_t *retval) { - /* { - syscallarg(int) fildes; - syscallarg(struct aiocb *) aiocbp; - } */ - struct proc *p = l->l_proc; struct aioproc *aio; struct aiocb *aiocbp_uptr; @@ -1657,10 +1668,12 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, fildes = (unsigned int)SCARG(uap, fildes); dt = atomic_load_consume(&fdp->fd_dt); - if (fildes >= dt->dt_nfiles) + if (fildes >= dt->dt_nfiles) { return SET_ERROR(EBADF); - if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) + } + if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) { return SET_ERROR(EBADF); + } fp = dt->dt_ff[fildes]->ff_file; /* Check if AIO structure is initialized */ @@ -1682,7 +1695,9 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, */ find.fp = fp; fg = RB_FIND(aiost_file_tree, aiosp->fg_root, &find); - have_active = (fg != NULL); + if (fg) { + have_active = fg->queue_size ? true : false; + } if (aiocbp_uptr) { struct aiocbp *aiocbp = NULL; @@ -1695,6 +1710,12 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, if (aiocbp) { job = aiocbp->job; + if (job->completed) { + *retval = AIO_ALLDONE; + } else { + *retval = AIO_NOTCANCELED; + } + /* * If the job is on sp->job (signified by job->on_queue) * that means that it has been distribtued yet. And if @@ -1712,12 +1733,6 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, aio_job_mark_complete(job); *retval = AIO_CANCELED; - } else { - if (job->completed) { - *retval = AIO_ALLDONE; - } else { - *retval = AIO_NOTCANCELED; - } } mutex_exit(&aiosp->mtx); @@ -1725,6 +1740,13 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, return 0; } + + *retval = AIO_ALLDONE; + + mutex_exit(&aiosp->mtx); + mutex_exit(&aio->aio_mtx); + + return 0; } /* @@ -1746,9 +1768,9 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, } } - if (canceled > 0 && !have_active) { + if (canceled && !have_active) { *retval = AIO_CANCELED; - } else if (canceled == 0) { + } else if (!canceled) { *retval = have_active ? AIO_NOTCANCELED : AIO_ALLDONE; } else { *retval = AIO_NOTCANCELED; @@ -1764,14 +1786,12 @@ int sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap, register_t *retval) { - /* { - syscallarg(const struct aiocb *) aiocbp; - } */ struct proc *p = l->l_proc; struct aioproc *aio = p->p_aio; - if (aio == NULL) + if (aio == NULL) { return SET_ERROR(EINVAL); + } const void *uptr = SCARG(uap, aiocbp); return aiosp_error(&aio->aiosp, uptr, retval); @@ -1781,14 +1801,11 @@ int sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap, register_t *retval) { - /* { - syscallarg(int) op; - syscallarg(struct aiocb *) aiocbp; - } */ int op = SCARG(uap, op); - if ((op != O_DSYNC) && (op != O_SYNC)) + if ((op != O_DSYNC) && (op != O_SYNC)) { return SET_ERROR(EINVAL); + } op = O_DSYNC ? AIO_DSYNC : AIO_SYNC; @@ -1802,8 +1819,9 @@ sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap, int error; error = aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL); - if (error) + if (error) { return error; + } struct proc *p = l->l_proc; struct aioproc *aio = p->p_aio; @@ -1815,9 +1833,6 @@ int sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap, register_t *retval) { - /* { - syscallarg(struct aiocb *) aiocbp; - } */ struct proc *p = l->l_proc; struct aioproc *aio = p->p_aio; @@ -1863,7 +1878,7 @@ sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, } error = aiosp_suspend(&aio->aiosp, list, nent, SCARG(uap, timeout) ? - &ts : NULL, AIOSP_SUSPEND_ALL); + &ts : NULL, AIOSP_SUSPEND_ANY); out: kmem_free(list, nent * sizeof(*list)); return error; @@ -1890,12 +1905,6 @@ int sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, register_t *retval) { - /* { - syscallarg(int) mode; - syscallarg(struct aiocb *const[]) list; - syscallarg(int) nent; - syscallarg(struct sigevent *) sig; - } */ struct proc *p = l->l_proc; struct aioproc *aio; struct aiocb **aiocbp_list; @@ -1906,10 +1915,12 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, nent = SCARG(uap, nent); /* Non-accurate checks for the limit and invalid values */ - if (nent < 1 || nent > aio_listio_max) + if (nent < 1 || nent > aio_listio_max) { return SET_ERROR(EINVAL); - if (aio_jobs_count + nent > aio_max) + } + if (aio_jobs_count + nent > aio_max) { return SET_ERROR(EAGAIN); + } /* Check if AIO structure is initialized, if not initialize it */ if (p->p_aio == NULL) { @@ -1971,8 +1982,9 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, * According to POSIX, in such error case it may * fail with other I/O operations initiated. */ - if (error) + if (error) { errcnt++; + } } error = aiosp_distribute_jobs(&aio->aiosp); @@ -1982,7 +1994,7 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, mutex_enter(&aio->aio_mtx); - /* Return an error, if any */ + /* Return an error if any */ if (errcnt) { error = SET_ERROR(EIO); goto err; @@ -1994,8 +2006,9 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, } err: - if (--lio->refcnt != 0) + if (--lio->refcnt != 0) { lio = NULL; + } mutex_exit(&aio->aio_mtx); if (lio != NULL) { aiost_sigsend(p, &lio->sig); @@ -2019,11 +2032,13 @@ sysctl_aio_listio_max(SYSCTLFN_ARGS) newsize = aio_listio_max; error = sysctl_lookup(SYSCTLFN_CALL(&node)); - if (error || newp == NULL) + if (error || newp == NULL) { return error; + } - if (newsize < 1 || newsize > aio_max) + if (newsize < 1 || newsize > aio_max) { return SET_ERROR(EINVAL); + } aio_listio_max = newsize; return 0; @@ -2040,11 +2055,13 @@ sysctl_aio_max(SYSCTLFN_ARGS) newsize = aio_max; error = sysctl_lookup(SYSCTLFN_CALL(&node)); - if (error || newp == NULL) + if (error || newp == NULL) { return error; + } - if (newsize < 1 || newsize < aio_listio_max) + if (newsize < 1 || newsize < aio_listio_max) { return SET_ERROR(EINVAL); + } aio_max = newsize; return 0; @@ -2063,8 +2080,9 @@ SYSCTL_SETUP(sysctl_aio_init, "aio sysctl") NULL, _POSIX_ASYNCHRONOUS_IO, NULL, 0, CTL_KERN, CTL_CREATE, CTL_EOL); - if (rv != 0) + if (rv != 0) { return; + } rv = sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, @@ -2074,8 +2092,9 @@ SYSCTL_SETUP(sysctl_aio_init, "aio sysctl") sysctl_aio_listio_max, 0, &aio_listio_max, 0, CTL_KERN, CTL_CREATE, CTL_EOL); - if (rv != 0) + if (rv != 0) { return; + } rv = sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, @@ -2156,8 +2175,8 @@ aio_print_jobs(void (*pr)(const char *, ...)) } if (st->fg) { - (*pr)(" file-group: vp=%p fp=%p qlen=%zu\n", - (void *)st->fg->vp, (void *)st->fg->fp, + (*pr)(" file-group: fp=%p qlen=%zu\n", + (void *)st->fg->fp, st->fg->queue_size); } } diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 2942667fde872..91fb493cd9331 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -137,7 +137,6 @@ struct aiost; struct aiost_file_group { RB_ENTRY(aiost_file_group) tree; struct file *fp; - struct vnode *vp; struct aiost *aiost; TAILQ_HEAD(, aio_job) queue; size_t queue_size; From 748f71b48d0a287625469b2e31514443a7c860ea Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Wed, 27 Aug 2025 00:15:08 -0600 Subject: [PATCH 46/53] tests --- tests/lib/libc/sys/t_aio_cancel.c | 222 ++++++++++++++++++++++++ tests/lib/libc/sys/t_aio_lio.c | 262 +++++++++++++++++++++++++++++ tests/lib/libc/sys/t_aio_rw.c | 167 ++++++++++++++++++ tests/lib/libc/sys/t_aio_suspend.c | 170 +++++++++++++++++++ 4 files changed, 821 insertions(+) create mode 100644 tests/lib/libc/sys/t_aio_cancel.c create mode 100644 tests/lib/libc/sys/t_aio_lio.c create mode 100644 tests/lib/libc/sys/t_aio_rw.c create mode 100644 tests/lib/libc/sys/t_aio_suspend.c diff --git a/tests/lib/libc/sys/t_aio_cancel.c b/tests/lib/libc/sys/t_aio_cancel.c new file mode 100644 index 0000000000000..8a83781ec2fb0 --- /dev/null +++ b/tests/lib/libc/sys/t_aio_cancel.c @@ -0,0 +1,222 @@ +/* $NetBSD: t_aio_cancel.c,v 1.00 2025/08/26 00:00:00 ethan4984 Exp $ */ + +/* + * Copyright (c) 2025 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND + * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int mktemp_file(char *, size_t); +static void fill_pattern(uint8_t *, size_t, uint8_t); +static void wait_all(const struct aiocb * const [], size_t); + +static int +mktemp_file(char *path, size_t pathlen) +{ + int fd, n; + + n = snprintf(path, pathlen, "t_aio_cancel.XXXXXX"); + ATF_REQUIRE(n > 0 && (size_t)n < pathlen); + + fd = mkstemp(path); + ATF_REQUIRE(fd >= 0); + + return fd; +} + +static void +fill_pattern(uint8_t *buf, size_t len, uint8_t seed) +{ + size_t i; + + for (i = 0; i < len; i++) { + buf[i] = (uint8_t)(seed + (i & 0xff)); + } +} + +static void +wait_all(const struct aiocb * const list[], size_t nent) +{ + size_t i; + int pending; + int rv; + + for (;;) { + pending = 0; + + for (i = 0; i < nent; i++) { + int err; + + if (list[i] == NULL) { + continue; + } + + err = aio_error(list[i]); + if (err == EINPROGRESS) { + pending = 1; + } + } + + if (!pending) { + break; + } + + rv = aio_suspend(list, (int)nent, NULL); + ATF_REQUIRE_EQ_MSG(0, rv, "aio_suspend failed: %s", strerror(errno)); + } +} + +ATF_TC_WITHOUT_HEAD(cancel_active_write); +ATF_TC_BODY(cancel_active_write, tc) +{ + char path[64]; + int fd, rv, crv, err; + const size_t blksz = 0x1000; + uint8_t *wbuf; + struct aiocb cb; + const struct aiocb *list[1]; + + fd = mktemp_file(path, sizeof(path)); + + wbuf = malloc(blksz); + ATF_REQUIRE(wbuf != NULL); + fill_pattern(wbuf, blksz, 0x33); + + memset(&cb, 0, sizeof(cb)); + cb.aio_fildes = fd; + cb.aio_buf = wbuf; + cb.aio_nbytes = blksz; + cb.aio_offset = 0; + + rv = aio_write(&cb); + ATF_REQUIRE_EQ(0, rv); + + crv = aio_cancel(fd, &cb); + ATF_REQUIRE(crv == AIO_CANCELED || crv == AIO_NOTCANCELED || crv == AIO_ALLDONE); + + if (crv == AIO_CANCELED) { + do { + err = aio_error(&cb); + } while (err == EINPROGRESS); + ATF_REQUIRE_EQ(ECANCELED, err); + ATF_REQUIRE_EQ(-1, aio_return(&cb)); + } else if (crv == AIO_NOTCANCELED) { + list[0] = &cb; + wait_all(list, 1); + ATF_REQUIRE_EQ(0, aio_error(&cb)); + ATF_REQUIRE_EQ((ssize_t)blksz, aio_return(&cb)); + } else { + do { + err = aio_error(&cb); + } while (err == EINPROGRESS); + ATF_REQUIRE_EQ(0, err); + ATF_REQUIRE_EQ((ssize_t)blksz, aio_return(&cb)); + } + + rv = close(fd); + ATF_REQUIRE_EQ(0, rv); + rv = unlink(path); + ATF_REQUIRE_EQ(0, rv); + + free(wbuf); +} + +ATF_TC_WITHOUT_HEAD(cancel_completed_request); +ATF_TC_BODY(cancel_completed_request, tc) +{ + char path[64]; + int fd, rv, crv; + const size_t blksz = 4096; + uint8_t *wbuf; + struct aiocb cb; + const struct aiocb *list[1]; + + fd = mktemp_file(path, sizeof(path)); + + wbuf = malloc(blksz); + ATF_REQUIRE(wbuf != NULL); + memset(wbuf, 0x7E, blksz); + + memset(&cb, 0, sizeof(cb)); + cb.aio_fildes = fd; + cb.aio_buf = wbuf; + cb.aio_nbytes = blksz; + cb.aio_offset = 0; + + rv = aio_write(&cb); + ATF_REQUIRE_EQ(0, rv); + + list[0] = &cb; + wait_all(list, 1); + ATF_REQUIRE_EQ(0, aio_error(&cb)); + ATF_REQUIRE_EQ((ssize_t)blksz, aio_return(&cb)); + + crv = aio_cancel(fd, &cb); + ATF_REQUIRE_EQ(AIO_ALLDONE, crv); + + rv = close(fd); + ATF_REQUIRE_EQ(0, rv); + rv = unlink(path); + ATF_REQUIRE_EQ(0, rv); + + free(wbuf); +} + +ATF_TC_WITHOUT_HEAD(cancel_invalid_fd); +ATF_TC_BODY(cancel_invalid_fd, tc) +{ + struct aiocb cb; + int crv; + + memset(&cb, 0, sizeof(cb)); + cb.aio_fildes = -1; + + errno = 0; + crv = aio_cancel(-1, &cb); + ATF_REQUIRE_EQ(-1, crv); + ATF_REQUIRE_EQ(EBADF, errno); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, cancel_active_write); + ATF_TP_ADD_TC(tp, cancel_completed_request); + ATF_TP_ADD_TC(tp, cancel_invalid_fd); + return atf_no_error(); +} diff --git a/tests/lib/libc/sys/t_aio_lio.c b/tests/lib/libc/sys/t_aio_lio.c new file mode 100644 index 0000000000000..c841c9ed3376a --- /dev/null +++ b/tests/lib/libc/sys/t_aio_lio.c @@ -0,0 +1,262 @@ +/* $NetBSD: t_aio_lio.c,v 1.00 2025/08/26 00:00:00 ethan4984 Exp $ */ + +/* + * Copyright (c) 2025 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND + * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int mktemp_file(char *, size_t); +static void fill_pattern(uint8_t *, size_t, uint8_t); +static void wait_all(const struct aiocb * const [], size_t); + +static int +mktemp_file(char *path, size_t pathlen) +{ + int fd, n; + + n = snprintf(path, pathlen, "t_aio_lio.XXXXXX"); + ATF_REQUIRE(n > 0 && (size_t)n < pathlen); + + fd = mkstemp(path); + ATF_REQUIRE(fd >= 0); + + return fd; +} + +static void +fill_pattern(uint8_t *buf, size_t len, uint8_t seed) +{ + size_t i; + + for (i = 0; i < len; i++) { + buf[i] = (uint8_t)(seed + (i & 0xff)); + } +} + +static void +wait_all(const struct aiocb * const list[], size_t nent) +{ + size_t i; + int pending, rv; + + for (;;) { + pending = 0; + + for (i = 0; i < nent; i++) { + int err; + + if (list[i] == NULL) { + continue; + } + + err = aio_error(list[i]); + if (err == EINPROGRESS) { + pending = 1; + } + } + + if (!pending) { + break; + } + + rv = aio_suspend(list, (int)nent, NULL); + ATF_REQUIRE_EQ_MSG(0, rv, "aio_suspend failed: %s", + strerror(errno)); + } +} + +ATF_TC_WITHOUT_HEAD(lio_nowait); +ATF_TC_BODY(lio_nowait, tc) +{ + char path[64]; + int fd, rv; + const size_t nreq = 8, blksz = 8192; + uint8_t *bufs[nreq]; + struct aiocb cbs[nreq]; + struct aiocb *list[nreq]; + off_t off; + size_t i; + + fd = mktemp_file(path, sizeof(path)); + + off = 0; + for (i = 0; i < nreq; i++) { + bufs[i] = malloc(blksz); + ATF_REQUIRE(bufs[i] != NULL); + + fill_pattern(bufs[i], blksz, (uint8_t)i); + + memset(&cbs[i], 0, sizeof(cbs[i])); + cbs[i].aio_fildes = fd; + cbs[i].aio_buf = bufs[i]; + cbs[i].aio_nbytes = blksz; + cbs[i].aio_offset = off; + cbs[i].aio_lio_opcode = LIO_WRITE; + + list[i] = &cbs[i]; + off += (off_t)blksz; + } + + rv = lio_listio(LIO_NOWAIT, list, (int)nreq, NULL); + ATF_REQUIRE_EQ_MSG(0, rv, "lio_listio failed: %s", + strerror(errno)); + + wait_all((const struct aiocb * const *)list, nreq); + + for (i = 0; i < nreq; i++) { + int err; + ssize_t done; + + err = aio_error(&cbs[i]); + ATF_REQUIRE_EQ(0, err); + + done = aio_return(&cbs[i]); + ATF_REQUIRE_EQ((ssize_t)blksz, done); + + free(bufs[i]); + } + + rv = close(fd); + ATF_REQUIRE_EQ(0, rv); + rv = unlink(path); + ATF_REQUIRE_EQ(0, rv); +} + +ATF_TC_WITHOUT_HEAD(lio_wait_write_then_read); +ATF_TC_BODY(lio_wait_write_then_read, tc) +{ + char path[64]; + int fd, rv; + const size_t nreq = 4, blksz = 4096; + + uint8_t *wbufs[nreq]; + struct aiocb wcbs[nreq]; + struct aiocb *wlist[nreq]; + + uint8_t *rbufs[nreq]; + struct aiocb rcbs[nreq]; + struct aiocb *rlist[nreq]; + + size_t i; + off_t off; + + fd = mktemp_file(path, sizeof(path)); + + off = 0; + for (i = 0; i < nreq; i++) { + wbufs[i] = malloc(blksz); + ATF_REQUIRE(wbufs[i] != NULL); + + fill_pattern(wbufs[i], blksz, (uint8_t)(0xA0 + i)); + + memset(&wcbs[i], 0, sizeof(wcbs[i])); + wcbs[i].aio_fildes = fd; + wcbs[i].aio_buf = wbufs[i]; + wcbs[i].aio_nbytes = blksz; + wcbs[i].aio_offset = off; + wcbs[i].aio_lio_opcode = LIO_WRITE; + + wlist[i] = &wcbs[i]; + off += (off_t)blksz; + } + + rv = lio_listio(LIO_WAIT, wlist, (int)nreq, NULL); + ATF_REQUIRE_EQ_MSG(0, rv, "lio_listio write failed: %s", + strerror(errno)); + + for (i = 0; i < nreq; i++) { + int err; + ssize_t done; + + err = aio_error(&wcbs[i]); + ATF_REQUIRE_EQ(0, err); + + done = aio_return(&wcbs[i]); + ATF_REQUIRE_EQ((ssize_t)blksz, done); + } + + for (i = 0; i < nreq; i++) { + rbufs[i] = calloc(1, blksz); + ATF_REQUIRE(rbufs[i] != NULL); + + memset(&rcbs[i], 0, sizeof(rcbs[i])); + rcbs[i].aio_fildes = fd; + rcbs[i].aio_buf = rbufs[i]; + rcbs[i].aio_nbytes = blksz; + rcbs[i].aio_offset = (off_t)i * (off_t)blksz; + rcbs[i].aio_lio_opcode = LIO_READ; + + rlist[i] = &rcbs[i]; + } + + rv = lio_listio(LIO_NOWAIT, rlist, (int)nreq, NULL); + ATF_REQUIRE_EQ_MSG(0, rv, "lio_listio read failed: %s", + strerror(errno)); + + wait_all((const struct aiocb * const *)rlist, nreq); + + for (i = 0; i < nreq; i++) { + int err; + ssize_t done; + + err = aio_error(&rcbs[i]); + ATF_REQUIRE_EQ(0, err); + + done = aio_return(&rcbs[i]); + ATF_REQUIRE_EQ((ssize_t)blksz, done); + + ATF_REQUIRE_EQ(0, memcmp(wbufs[i], rbufs[i], blksz)); + + free(wbufs[i]); + free(rbufs[i]); + } + + rv = close(fd); + ATF_REQUIRE_EQ(0, rv); + rv = unlink(path); + ATF_REQUIRE_EQ(0, rv); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, lio_nowait); + ATF_TP_ADD_TC(tp, lio_wait_write_then_read); + + return atf_no_error(); +} diff --git a/tests/lib/libc/sys/t_aio_rw.c b/tests/lib/libc/sys/t_aio_rw.c new file mode 100644 index 0000000000000..f2491eb254bf6 --- /dev/null +++ b/tests/lib/libc/sys/t_aio_rw.c @@ -0,0 +1,167 @@ +/* $NetBSD: t_aio_rw.c,v 1.00 2025/08/26 00:00:00 ethan4984 Exp $ */ + +/* + * Copyright (c) 2025 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND + * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int mktemp_file(char *, size_t); +static void fill_pattern(uint8_t *, size_t, uint8_t); +static void wait_all(const struct aiocb * const [], size_t); + +static int +mktemp_file (char *path, size_t pathlen) +{ + int fd, n; + + n = snprintf(path, pathlen, "t_aio_rw.XXXXXX"); + ATF_REQUIRE(n > 0 && (size_t)n < pathlen); + + fd = mkstemp(path); + ATF_REQUIRE(fd >= 0); + + return fd; +} + +static void +fill_pattern (uint8_t *buf, size_t len, uint8_t seed) +{ + size_t i; + + for (i = 0; i < len; i++) { + buf[i] = (uint8_t)(seed + (i & 0xff)); + } +} + +static void +wait_all (const struct aiocb * const list[], size_t nent) +{ + size_t i; + int pending, rv, error; + + for (;;) { + pending = 0; + + for (i = 0; i < nent; i++) { + if (list[i] == NULL) { + continue; + } + + error = aio_error(list[i]); + if (error == EINPROGRESS) { + pending = 1; + } + } + + if (!pending) { + break; + } + + rv = aio_suspend(list, (int)nent, NULL); + ATF_REQUIRE_EQ_MSG(0, rv, "aio_suspend failed: %s", + strerror(errno)); + } +} + +/* + * write_then_read_back + * Write a block then read it back asynchronously and compare. + */ +ATF_TC_WITHOUT_HEAD(write_then_read_back); +ATF_TC_BODY(write_then_read_back, tc) +{ + char path[64]; + int fd, rv; + const size_t blksz = 0x2000; + uint8_t *wbuf, *rbuf; + struct aiocb wcb, rcb; + const struct aiocb *wlist[1], *rlist[1]; + + fd = mktemp_file(path, sizeof(path)); + + wbuf = malloc(blksz); + rbuf = calloc(1, blksz); + ATF_REQUIRE(wbuf != NULL && rbuf != NULL); + + fill_pattern(wbuf, blksz, 0xA0); + + memset(&wcb, 0, sizeof(wcb)); + wcb.aio_fildes = fd; + wcb.aio_buf = wbuf; + wcb.aio_nbytes = blksz; + wcb.aio_offset = 0; + + rv = aio_write(&wcb); + ATF_REQUIRE_EQ(0, rv); + wlist[0] = &wcb; + wait_all(wlist, 1); + + ATF_REQUIRE_EQ(0, aio_error(&wcb)); + ATF_REQUIRE_EQ((ssize_t)blksz, aio_return(&wcb)); + + memset(&rcb, 0, sizeof(rcb)); + rcb.aio_fildes = fd; + rcb.aio_buf = rbuf; + rcb.aio_nbytes = blksz; + rcb.aio_offset = 0; + + rv = aio_read(&rcb); + ATF_REQUIRE_EQ(0, rv); + rlist[0] = &rcb; + wait_all(rlist, 1); + + ATF_REQUIRE_EQ(0, aio_error(&rcb)); + ATF_REQUIRE_EQ((ssize_t)blksz, aio_return(&rcb)); + ATF_REQUIRE_EQ(0, memcmp(wbuf, rbuf, blksz)); + + rv = close(fd); + ATF_REQUIRE_EQ(0, rv); + rv = unlink(path); + ATF_REQUIRE_EQ(0, rv); + + free(wbuf); + free(rbuf); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, write_then_read_back); + return atf_no_error(); +} diff --git a/tests/lib/libc/sys/t_aio_suspend.c b/tests/lib/libc/sys/t_aio_suspend.c new file mode 100644 index 0000000000000..6f766d5a0e685 --- /dev/null +++ b/tests/lib/libc/sys/t_aio_suspend.c @@ -0,0 +1,170 @@ +/* $NetBSD: t_aio_suspend.c,v 1.00 2025/08/26 00:00:00 ethan4984 Exp $ */ + +/* + * Copyright (c) 2025 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND + * CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int mktemp_file(char *, size_t); +static void fill_pattern(uint8_t *, size_t, uint8_t); +static void wait_cb(struct aiocb *); + +static int +mktemp_file(char *path, size_t pathlen) +{ + int fd, n; + + n = snprintf(path, pathlen, "t_aio_suspend.XXXXXX"); + ATF_REQUIRE(n > 0 && (size_t)n < pathlen); + + fd = mkstemp(path); + ATF_REQUIRE(fd >= 0); + + return fd; +} + +static void +fill_pattern(uint8_t *buf, size_t len, uint8_t seed) +{ + size_t i; + + for (i = 0; i < len; i++) { + buf[i] = (uint8_t)(seed + (i & 0xff)); + } +} + +static void +wait_cb(struct aiocb *cb) +{ + const struct aiocb *one[1]; + int rv; + + one[0] = cb; + while (aio_error(cb) == EINPROGRESS) { + rv = aio_suspend(one, 1, NULL); + ATF_REQUIRE_EQ(0, rv); + } + if (aio_error(cb) == 0) { + aio_return(cb); + } +} + +ATF_TC_WITHOUT_HEAD(suspend_any); +ATF_TC_BODY(suspend_any, tc) +{ + char path[64]; + int fd, rv; + const size_t blksz = 4096; + uint8_t *buf0, *buf1; + struct aiocb cb0, cb1; + const struct aiocb *list[2]; + int done; + + fd = mktemp_file(path, sizeof(path)); + + buf0 = malloc(blksz); + buf1 = malloc(blksz); + ATF_REQUIRE(buf0 != NULL && buf1 != NULL); + fill_pattern(buf0, blksz, 0x20); + fill_pattern(buf1, blksz, 0x40); + + memset(&cb0, 0, sizeof(cb0)); + cb0.aio_fildes = fd; + cb0.aio_buf = buf0; + cb0.aio_nbytes = blksz; + cb0.aio_offset = 0; + + memset(&cb1, 0, sizeof(cb1)); + cb1.aio_fildes = fd; + cb1.aio_buf = buf1; + cb1.aio_nbytes = blksz; + cb1.aio_offset = blksz; + + ATF_REQUIRE_EQ(0, aio_write(&cb0)); + ATF_REQUIRE_EQ(0, aio_write(&cb1)); + + list[0] = &cb0; + list[1] = &cb1; + + rv = aio_suspend(list, 2, NULL); + ATF_REQUIRE_EQ(0, rv); + + done = 0; + if (aio_error(&cb0) != EINPROGRESS) { + done++; + if (aio_error(&cb0) == 0) { + ATF_REQUIRE_EQ((ssize_t)blksz, aio_return(&cb0)); + } else { + ATF_REQUIRE_EQ(ECANCELED, aio_error(&cb0)); + ATF_REQUIRE_EQ(-1, aio_return(&cb0)); + } + } + if (aio_error(&cb1) != EINPROGRESS) { + done++; + if (aio_error(&cb1) == 0) { + ATF_REQUIRE_EQ((ssize_t)blksz, aio_return(&cb1)); + } else { + ATF_REQUIRE_EQ(ECANCELED, aio_error(&cb1)); + ATF_REQUIRE_EQ(-1, aio_return(&cb1)); + } + } + ATF_REQUIRE(done >= 1); + + if (aio_error(&cb0) == EINPROGRESS) { + wait_cb(&cb0); + } + if (aio_error(&cb1) == EINPROGRESS) { + wait_cb(&cb1); + } + + rv = close(fd); + ATF_REQUIRE_EQ(0, rv); + rv = unlink(path); + ATF_REQUIRE_EQ(0, rv); + + free(buf0); + free(buf1); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, suspend_any); + return atf_no_error(); +} From 0ba5f4f363c47326435cfe834208e718cefc6487 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Wed, 27 Aug 2025 00:17:57 -0600 Subject: [PATCH 47/53] style --- tests/lib/libc/sys/t_aio_cancel.c | 9 ++++----- tests/lib/libc/sys/t_aio_lio.c | 6 +++--- tests/lib/libc/sys/t_aio_suspend.c | 6 +++--- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/lib/libc/sys/t_aio_cancel.c b/tests/lib/libc/sys/t_aio_cancel.c index 8a83781ec2fb0..25e32b52c02d2 100644 --- a/tests/lib/libc/sys/t_aio_cancel.c +++ b/tests/lib/libc/sys/t_aio_cancel.c @@ -47,7 +47,7 @@ static void fill_pattern(uint8_t *, size_t, uint8_t); static void wait_all(const struct aiocb * const [], size_t); static int -mktemp_file(char *path, size_t pathlen) +mktemp_file (char *path, size_t pathlen) { int fd, n; @@ -61,7 +61,7 @@ mktemp_file(char *path, size_t pathlen) } static void -fill_pattern(uint8_t *buf, size_t len, uint8_t seed) +fill_pattern (uint8_t *buf, size_t len, uint8_t seed) { size_t i; @@ -71,11 +71,10 @@ fill_pattern(uint8_t *buf, size_t len, uint8_t seed) } static void -wait_all(const struct aiocb * const list[], size_t nent) +wait_all (const struct aiocb * const list[], size_t nent) { size_t i; - int pending; - int rv; + int pending, rv; for (;;) { pending = 0; diff --git a/tests/lib/libc/sys/t_aio_lio.c b/tests/lib/libc/sys/t_aio_lio.c index c841c9ed3376a..19c19d39246c4 100644 --- a/tests/lib/libc/sys/t_aio_lio.c +++ b/tests/lib/libc/sys/t_aio_lio.c @@ -46,7 +46,7 @@ static void fill_pattern(uint8_t *, size_t, uint8_t); static void wait_all(const struct aiocb * const [], size_t); static int -mktemp_file(char *path, size_t pathlen) +mktemp_file (char *path, size_t pathlen) { int fd, n; @@ -60,7 +60,7 @@ mktemp_file(char *path, size_t pathlen) } static void -fill_pattern(uint8_t *buf, size_t len, uint8_t seed) +fill_pattern (uint8_t *buf, size_t len, uint8_t seed) { size_t i; @@ -70,7 +70,7 @@ fill_pattern(uint8_t *buf, size_t len, uint8_t seed) } static void -wait_all(const struct aiocb * const list[], size_t nent) +wait_all (const struct aiocb * const list[], size_t nent) { size_t i; int pending, rv; diff --git a/tests/lib/libc/sys/t_aio_suspend.c b/tests/lib/libc/sys/t_aio_suspend.c index 6f766d5a0e685..f62271e16e06e 100644 --- a/tests/lib/libc/sys/t_aio_suspend.c +++ b/tests/lib/libc/sys/t_aio_suspend.c @@ -47,7 +47,7 @@ static void fill_pattern(uint8_t *, size_t, uint8_t); static void wait_cb(struct aiocb *); static int -mktemp_file(char *path, size_t pathlen) +mktemp_file (char *path, size_t pathlen) { int fd, n; @@ -61,7 +61,7 @@ mktemp_file(char *path, size_t pathlen) } static void -fill_pattern(uint8_t *buf, size_t len, uint8_t seed) +fill_pattern (uint8_t *buf, size_t len, uint8_t seed) { size_t i; @@ -71,7 +71,7 @@ fill_pattern(uint8_t *buf, size_t len, uint8_t seed) } static void -wait_cb(struct aiocb *cb) +wait_cb (struct aiocb *cb) { const struct aiocb *one[1]; int rv; From f5ba6f51764dfccfdb07b88b4ee16fed636452e6 Mon Sep 17 00:00:00 2001 From: ethan4984 Date: Wed, 27 Aug 2025 01:25:42 -0600 Subject: [PATCH 48/53] add aio tests to build path --- distrib/sets/lists/tests/mi | 4 ++++ tests/lib/libc/sys/Makefile | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/distrib/sets/lists/tests/mi b/distrib/sets/lists/tests/mi index 8c811d8fba484..f8819bfa9f189 100644 --- a/distrib/sets/lists/tests/mi +++ b/distrib/sets/lists/tests/mi @@ -8102,3 +8102,7 @@ ./usr/tests/util/xlint/lint1/d_zero_sized_arrays.c tests-obsolete obsolete ./usr/tests/util/xlint/lint1/t_integration tests-obsolete obsolete ./var/db/obsolete/tests base-sys-root atf +./usr/tests/lib/libc/sys/t_aio_cancel tests +./usr/tests/lib/libc/sys/t_aio_suspend tests +./usr/tests/lib/libc/sys/t_aio_rw tests +./usr/tests/lib/libc/sys/t_aio_lio tests diff --git a/tests/lib/libc/sys/Makefile b/tests/lib/libc/sys/Makefile index bde309956db03..193b9392c2df1 100644 --- a/tests/lib/libc/sys/Makefile +++ b/tests/lib/libc/sys/Makefile @@ -93,9 +93,18 @@ TESTS_C+= t_wait TESTS_C+= t_wait_noproc TESTS_C+= t_wait_noproc_wnohang TESTS_C+= t_write +TESTS_C+= t_aio_cancel +TESTS_C+= t_aio_suspend +TESTS_C+= t_aio_rw +TESTS_C+= t_aio_lio SRCS.t_mprotect= t_mprotect.c ${SRCS_EXEC_PROT} t_mprotect_helper.c +LDADD.t_aio_cancel+= -lrt -lpthread +LDADD.t_aio_suspend+= -lrt -lpthread +LDADD.t_aio_rw+= -lrt -lpthread +LDADD.t_aio_lio+= -lrt -lpthread + LDADD.t_eventfd+= -lpthread LDADD.t_getpid+= -lpthread LDADD.t_mmap+= -lpthread From 0ab62f1cb10f7f869994bfadc5fad91bd5c2bea2 Mon Sep 17 00:00:00 2001 From: Ethan Date: Fri, 29 Aug 2025 20:22:21 -0600 Subject: [PATCH 49/53] more precise job destruction --- sys/kern/sys_aio.c | 169 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 135 insertions(+), 34 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index a5401b56c1f9e..67d80eaa290d1 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -143,6 +143,11 @@ static void aio_job_mark_complete(struct aio_job *); static void aio_file_hold(struct file *); static void aio_file_release(struct file *); +static int aiocbp_lookup_job_locked(struct aiosp *, const void *, + struct aio_job **); +static int aiocbp_remove_job_locked(struct aiosp *, const void *, + struct aio_job **, struct aiocbp **); + static const struct syscall_package aio_syscalls[] = { { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel }, { SYS_aio_error, 0, (sy_call_t *)sys_aio_error }, @@ -308,17 +313,19 @@ aio_exit(struct proc *p, void *cookie) } /* - * + * Destroy job structure */ static void aio_job_fini (struct aio_job *job) { + mutex_enter(&job->mtx); aiowaitgrouplk_fini(&job->lk); + mutex_exit(&job->mtx); mutex_destroy(&job->mtx); } /* - * + * Mark job as complete */ static void aio_job_mark_complete (struct aio_job *job) @@ -326,6 +333,7 @@ aio_job_mark_complete (struct aio_job *job) mutex_enter(&job->mtx); job->completed = true; aio_file_release(job->fp); + job->fp = NULL; aiowaitgrouplk_flush(&job->lk); mutex_exit(&job->mtx); @@ -481,7 +489,8 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, struct timespec *ts, int flags) { struct aio_job *job; - int error = 0, timo; + struct aiowaitgroup *wg = NULL; + int error = 0, timo = 0; size_t target = 0, monitor = 0; if (ts) { @@ -498,7 +507,7 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, timo = 0; } - struct aiowaitgroup *wg = kmem_zalloc(sizeof(*wg), KM_SLEEP); + wg = kmem_zalloc(sizeof(*wg), KM_SLEEP); aiowaitgroup_init(wg); mutex_enter(&wg->mtx); @@ -507,26 +516,24 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, continue; } - struct aiocbp *aiocbp = NULL; - error = aiocbp_lookup(aiosp, &aiocbp, aiocbp_list[i]); - if (error) { + if ((error = + aiocbp_lookup_job_locked(aiosp, aiocbp_list[i], &job)) != 0) { goto done; } - if (aiocbp == NULL) { + if (job == NULL) { continue; } - job = aiocbp->job; monitor++; - mutex_enter(&job->mtx); if (job->completed) { wg->completed++; wg->total++; + mutex_exit(&job->mtx); } else { aiowaitgroup_join(wg, &job->lk); + mutex_exit(&job->mtx); } - mutex_exit(&job->mtx); } if (!monitor) { @@ -539,7 +546,7 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, target = monitor; } - for (; wg->completed < target;) { + for (; wg->completed < target; ) { error = aiowaitgroup_wait(wg, timo); if (error) { goto done; @@ -548,14 +555,12 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, done: wg->active = false; wg->refcnt--; - if (wg->refcnt == 0) { mutex_exit(&wg->mtx); aiowaitgroup_fini(wg); } else { mutex_exit(&wg->mtx); } - return error; } @@ -623,7 +628,9 @@ aiosp_destroy(struct aiosp *sp, int *cn) int error, cnt = 0; for (;;) { - /* peek one worker under sp->mtx. */ + /* + * peek one worker under sp->mtx + */ mutex_enter(&sp->mtx); st = TAILQ_FIRST(&sp->freelist); if (st == NULL) @@ -1110,7 +1117,7 @@ aiosp_validate_conflicts(struct aiosp *aiosp, const void *uptr) if (job && st->job->aiocb_uptr == uptr) { mutex_exit(&aiosp->mtx); return EINVAL; - } else { + } else if (st->fg) { TAILQ_FOREACH(job, &st->fg->queue, list) { if (job->aiocb_uptr == uptr) { mutex_exit(&aiosp->mtx); @@ -1159,25 +1166,31 @@ int aiosp_error(struct aiosp *aiosp, const void *uptr, register_t *retval) int aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) { - struct aiocbp *aiocbp = NULL; - struct aio_job *job; + struct aiocbp *handle = NULL; + struct aio_job *job = NULL; int error; - error = aiocbp_lookup(aiosp, &aiocbp, uptr); + error = aiocbp_remove_job_locked(aiosp, uptr, &job, &handle); if (error) { return error; } - if (aiocbp == NULL) { + + if (job == NULL) { + if (handle) { + kmem_free(handle, sizeof(*handle)); + } return SET_ERROR(ENOENT); } - job = aiocbp->job; - if (job == NULL || job->aiocbp._state != JOB_DONE) { + if (job->aiocbp._state != JOB_DONE) { + mutex_exit(&job->mtx); + if (handle) { + kmem_free(handle, sizeof(*handle)); + } return SET_ERROR(EINVAL); } *retval = job->aiocbp._retval; - aiocbp_remove(aiosp, uptr); if (job->fp) { aio_file_release(job->fp); @@ -1188,6 +1201,11 @@ aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) job->aiocbp._retval = -1; job->aiocbp._state = JOB_NONE; + mutex_exit(&job->mtx); + if (handle) { + kmem_free(handle, sizeof(*handle)); + } + aio_job_fini(job); pool_put(&aio_job_pool, job); atomic_dec_uint(&aio_jobs_count); @@ -1207,6 +1225,72 @@ aiocbp_hash(const void *uptr) /* * Find aiocb entry by user pointer. */ +static int +aiocbp_lookup_job_locked(struct aiosp *aiosp, const void *uptr, + struct aio_job **jobp) +{ + struct aiocbp *aiocbp; + struct aio_job *job = NULL; + u_int hash; + + *jobp = NULL; + hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; + + mutex_enter(&aiosp->aio_hash_mtx); + TAILQ_FOREACH(aiocbp, &aiosp->aio_hash[hash], list) { + if (aiocbp->uptr == uptr) { + job = aiocbp->job; + if (job) { + mutex_enter(&job->mtx); + } + + mutex_exit(&aiosp->aio_hash_mtx); + *jobp = job; + return 0; + } + } + mutex_exit(&aiosp->aio_hash_mtx); + + *jobp = NULL; + return 0; +} + +/* + * Detach job and return job with job->mtx held + */ +static int +aiocbp_remove_job_locked(struct aiosp *aiosp, const void *uptr, + struct aio_job **jobp, struct aiocbp **handlep) +{ + struct aiocbp *aiocbp; + struct aio_job *job = NULL; + u_int hash; + + *jobp = NULL; + *handlep = NULL; + hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; + + mutex_enter(&aiosp->aio_hash_mtx); + TAILQ_FOREACH(aiocbp, &aiosp->aio_hash[hash], list) { + if (aiocbp->uptr == uptr) { + job = aiocbp->job; + if (job) { + mutex_enter(&job->mtx); + } + + TAILQ_REMOVE(&aiosp->aio_hash[hash], aiocbp, list); + mutex_exit(&aiosp->aio_hash_mtx); + *handlep = aiocbp; + *jobp = job; + + return 0; + } + } + mutex_exit(&aiosp->aio_hash_mtx); + + return SET_ERROR(ENOENT); +} + int aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, const void *uptr) { @@ -1395,15 +1479,39 @@ aiowaitgrouplk_init(struct aiowaitgrouplk *lk) /* * Clean up wait group link resources. + * Caller must hold job->mtx */ void aiowaitgrouplk_fini(struct aiowaitgrouplk *lk) { - mutex_destroy(&lk->mtx); + mutex_enter(&lk->mtx); - if (lk->s) { - kmem_free(lk->wgs, sizeof(*lk->wgs) * lk->s); + for (size_t i = 0; i < lk->n; i++) { + struct aiowaitgroup *wg = lk->wgs[i]; + if (!wg) { + continue; + } + + lk->wgs[i] = NULL; + + mutex_enter(&wg->mtx); + if (--wg->refcnt == 0) { + mutex_exit(&wg->mtx); + aiowaitgroup_fini(wg); + } else { + mutex_exit(&wg->mtx); + } + } + + if (lk->wgs) { + kmem_free(lk->wgs, lk->s * sizeof(*lk->wgs)); } + lk->wgs = NULL; + lk->n = 0; + lk->s = 0; + + mutex_exit(&lk->mtx); + mutex_destroy(&lk->mtx); } /* @@ -1484,10 +1592,6 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) struct sigevent *sig; int error; - /* Non-accurate check for the limit */ - if (aio_jobs_count + 1 > aio_max) - return SET_ERROR(EAGAIN); - /* Get the data structure from user-space */ error = copyin(aiocb_uptr, &aiocb, sizeof(struct aiocb)); if (error) { @@ -1807,7 +1911,7 @@ sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap, return SET_ERROR(EINVAL); } - op = O_DSYNC ? AIO_DSYNC : AIO_SYNC; + op = (op == O_DSYNC) ? AIO_DSYNC : AIO_SYNC; return aio_enqueue_job(op, SCARG(uap, aiocbp), NULL); } @@ -1918,9 +2022,6 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, if (nent < 1 || nent > aio_listio_max) { return SET_ERROR(EINVAL); } - if (aio_jobs_count + nent > aio_max) { - return SET_ERROR(EAGAIN); - } /* Check if AIO structure is initialized, if not initialize it */ if (p->p_aio == NULL) { From 8aaf5159797496568e61cff43ce513249cdd3f77 Mon Sep 17 00:00:00 2001 From: Ethan Date: Sat, 30 Aug 2025 16:36:05 -0600 Subject: [PATCH 50/53] more strict on lock ordering --- sys/kern/sys_aio.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 67d80eaa290d1..fc3b6dbace52f 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -62,6 +62,7 @@ * * Implementation notes * io_read/io_write currently use fallback implementations + * lock ordering: aiosp -> aiost -> file_group -> job -> lk -> wg */ #include @@ -510,7 +511,6 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, wg = kmem_zalloc(sizeof(*wg), KM_SLEEP); aiowaitgroup_init(wg); - mutex_enter(&wg->mtx); for (int i = 0; i < nent; i++) { if (aiocbp_list[i] == NULL) { continue; @@ -527,8 +527,10 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, monitor++; if (job->completed) { + mutex_enter(&wg->mtx); wg->completed++; wg->total++; + mutex_exit(&wg->mtx); mutex_exit(&job->mtx); } else { aiowaitgroup_join(wg, &job->lk); @@ -536,6 +538,8 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, } } + mutex_enter(&wg->mtx); + if (!monitor) { goto done; } @@ -1453,8 +1457,10 @@ int aiowaitgroup_wait(struct aiowaitgroup *wg, int timo) { int error; - - error = cv_timedwait_sig(&wg->done_cv, &wg->mtx, timo); + + error = (timo == 0) ? + cv_wait_sig(&wg->done_cv, &wg->mtx) : + cv_timedwait_sig(&wg->done_cv, &wg->mtx, timo); if (error) { if (error == EWOULDBLOCK) { error = SET_ERROR(EAGAIN); @@ -1474,7 +1480,7 @@ aiowaitgrouplk_init(struct aiowaitgrouplk *lk) mutex_init(&lk->mtx, MUTEX_DEFAULT, IPL_NONE); lk->n = 0; lk->s = 2; - lk->wgs = kmem_alloc(sizeof(*lk->wgs) * lk->s, KM_SLEEP); + lk->wgs = kmem_zalloc(lk->s * sizeof(*lk->wgs), KM_SLEEP); } /* @@ -1523,15 +1529,16 @@ aiowaitgrouplk_flush(struct aiowaitgrouplk *lk) mutex_enter(&lk->mtx); for (int i = 0; i < lk->n; i++) { struct aiowaitgroup *wg = lk->wgs[i]; + + lk->wgs[i] = NULL; if (wg == NULL) { continue; } mutex_enter(&wg->mtx); - if (wg->active) { wg->completed++; - cv_signal(&wg->done_cv); + cv_broadcast(&wg->done_cv); } if (--wg->refcnt == 0) { @@ -1572,10 +1579,12 @@ aiowaitgroup_join(struct aiowaitgroup *wg, struct aiowaitgrouplk *lk) lk->s = new_size; lk->wgs = new_wgs; } - lk->wgs[lk->n] = wg; - lk->n++; + lk->wgs[lk->n++] = wg; + + mutex_enter(&wg->mtx); wg->total++; wg->refcnt++; + mutex_exit(&wg->mtx); mutex_exit(&lk->mtx); } From 6f7e3c5f1090727928a166fda38575b5e73a0293 Mon Sep 17 00:00:00 2001 From: Ethan Date: Mon, 1 Sep 2025 15:23:49 -0600 Subject: [PATCH 51/53] clean up and fg mtx --- sys/kern/sys_aio.c | 342 ++++++++++++++++++++++----------------------- sys/sys/aio.h | 1 + 2 files changed, 170 insertions(+), 173 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index fc3b6dbace52f..d874b66ea956d 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -62,7 +62,6 @@ * * Implementation notes * io_read/io_write currently use fallback implementations - * lock ordering: aiosp -> aiost -> file_group -> job -> lk -> wg */ #include @@ -262,19 +261,19 @@ aio_procinit(struct proc *p) int error; /* Allocate and initialize AIO structure */ - aio = kmem_zalloc(sizeof(struct aioproc), KM_SLEEP); + aio = kmem_zalloc(sizeof(*aio), KM_SLEEP); /* Initialize the service pool */ error = aiosp_initialize(&aio->aiosp); if (error) { - kmem_free(aio, sizeof(struct aioproc)); + kmem_free(aio, sizeof(*aio)); return error; } error = aiocbp_init(&aio->aiosp, 256); if (error) { aiosp_destroy(&aio->aiosp, NULL); - kmem_free(aio, sizeof(struct aioproc)); + kmem_free(aio, sizeof(*aio)); return error; } @@ -302,22 +301,23 @@ aio_exit(struct proc *p, void *cookie) { struct aioproc *aio; - if (cookie != NULL) + if (cookie != NULL) { aio = cookie; - else if ((aio = p->p_aio) == NULL) + } else if ((aio = p->p_aio) == NULL) { return; + } aiocbp_destroy(&aio->aiosp); aiosp_destroy(&aio->aiosp, NULL); mutex_destroy(&aio->aio_mtx); - kmem_free(aio, sizeof(struct aioproc)); + kmem_free(aio, sizeof(*aio)); } /* * Destroy job structure */ static void -aio_job_fini (struct aio_job *job) +aio_job_fini(struct aio_job *job) { mutex_enter(&job->mtx); aiowaitgrouplk_fini(&job->lk); @@ -329,7 +329,7 @@ aio_job_fini (struct aio_job *job) * Mark job as complete */ static void -aio_job_mark_complete (struct aio_job *job) +aio_job_mark_complete(struct aio_job *job) { mutex_enter(&job->mtx); job->completed = true; @@ -346,7 +346,7 @@ aio_job_mark_complete (struct aio_job *job) * Acquire a file reference for async ops */ static void -aio_file_hold (struct file *fp) +aio_file_hold(struct file *fp) { mutex_enter(&fp->f_lock); fp->f_count++; @@ -357,7 +357,7 @@ aio_file_hold (struct file *fp) * Release a file reference for async ops */ static void -aio_file_release (struct file *fp) +aio_file_release(struct file *fp) { mutex_enter(&fp->f_lock); fp->f_count--; @@ -369,6 +369,36 @@ aio_file_release (struct file *fp) mutex_exit(&fp->f_lock); } +/* + * Release a job back to the pool + */ +static inline void +aio_job_release(struct aio_job *job) +{ + if (job->fp) { + aio_file_release(job->fp); + job->fp = NULL; + } + + aio_job_fini(job); + pool_put(&aio_job_pool, job); + atomic_dec_uint(&aio_jobs_count); +} + +/* + * Cancel a job pending on aiosp->jobs + */ +static inline void +aio_job_cancel(struct aiosp *aiosp, struct aio_job *job) +{ + mutex_enter(&job->mtx); + TAILQ_REMOVE(&aiosp->jobs, job, list); + aiosp->jobs_pending--; + job->on_queue = false; + job->aiocbp._errno = ECANCELED; + mutex_exit(&job->mtx); +} + /* * Remove file group from tree locked */ @@ -380,6 +410,7 @@ aiosp_fg_teardown_locked(struct aiosp *sp, struct aiost_file_group *fg) } RB_REMOVE(aiost_file_tree, sp->fg_root, fg); + mutex_destroy(&fg->mtx); kmem_free(fg, sizeof(*fg)); } @@ -431,6 +462,7 @@ aiosp_distribute_jobs(struct aiosp *sp) fg = kmem_zalloc(sizeof(*fg), KM_SLEEP); fg->fp = fp; fg->queue_size = 0; + mutex_init(&fg->mtx, MUTEX_DEFAULT, IPL_NONE); TAILQ_INIT(&fg->queue); error = aiosp_worker_extract(sp, &aiost); @@ -462,8 +494,10 @@ aiosp_distribute_jobs(struct aiosp *sp) job->on_queue = false; if (fg) { + mutex_enter(&fg->mtx); TAILQ_INSERT_TAIL(&fg->queue, job, list); fg->queue_size++; + mutex_exit(&fg->mtx); } mutex_enter(&aiost->mtx); @@ -490,22 +524,16 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, struct timespec *ts, int flags) { struct aio_job *job; - struct aiowaitgroup *wg = NULL; + struct aiowaitgroup *wg; int error = 0, timo = 0; - size_t target = 0, monitor = 0; + size_t joined = 0; if (ts) { - timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000)); - if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0) { - timo = 1; - } - + timo = tstohz(ts); if (timo <= 0) { error = SET_ERROR(EAGAIN); return error; } - } else { - timo = 0; } wg = kmem_zalloc(sizeof(*wg), KM_SLEEP); @@ -516,50 +544,45 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, continue; } - if ((error = - aiocbp_lookup_job_locked(aiosp, aiocbp_list[i], &job)) != 0) { + error = aiocbp_lookup_job_locked(aiosp, aiocbp_list[i], &job); + if (error) { goto done; } if (job == NULL) { continue; } - monitor++; - if (job->completed) { mutex_enter(&wg->mtx); wg->completed++; wg->total++; mutex_exit(&wg->mtx); mutex_exit(&job->mtx); - } else { - aiowaitgroup_join(wg, &job->lk); - mutex_exit(&job->mtx); + continue; } - } - - mutex_enter(&wg->mtx); - if (!monitor) { - goto done; + aiowaitgroup_join(wg, &job->lk); + joined++; + mutex_exit(&job->mtx); } - if (flags & AIOSP_SUSPEND_ANY) { - target = 1; - } else if (flags & AIOSP_SUSPEND_ALL) { - target = monitor; + if (!joined) { + goto done; } - for (; wg->completed < target; ) { + mutex_enter(&wg->mtx); + const size_t target = (flags & AIOSP_SUSPEND_ANY) ? 1 : wg->total; + while (wg->completed < target) { error = aiowaitgroup_wait(wg, timo); if (error) { - goto done; + break; } } + mutex_exit(&wg->mtx); done: + mutex_enter(&wg->mtx); wg->active = false; - wg->refcnt--; - if (wg->refcnt == 0) { + if (--wg->refcnt == 0) { mutex_exit(&wg->mtx); aiowaitgroup_fini(wg); } else { @@ -637,8 +660,9 @@ aiosp_destroy(struct aiosp *sp, int *cn) */ mutex_enter(&sp->mtx); st = TAILQ_FIRST(&sp->freelist); - if (st == NULL) + if (st == NULL) { st = TAILQ_FIRST(&sp->active); + } mutex_exit(&sp->mtx); if (st == NULL) @@ -719,17 +743,16 @@ aiost_create(struct aiosp *sp, struct aiost **ret) * Process single job without coalescing. */ static void -aiost_process_singleton (struct aio_job *job) -{ - KASSERT(job); - if (job->aio_op & AIO_READ) { - io_read_fallback(job); - } else if (job->aio_op & AIO_WRITE) { - io_write_fallback(job); - } else if (job->aio_op & AIO_SYNC) { +aiost_process_singleton(struct aio_job *job) +{ + if ((job->aio_op & AIO_READ) == AIO_READ) { + io_read(job); + } else if ((job->aio_op & AIO_WRITE) == AIO_WRITE) { + io_write(job); + } else if ((job->aio_op & AIO_SYNC) == AIO_SYNC) { io_sync(job); } else { - panic("aio_process: invalid operation code\n"); + panic("%s: invalid operation code {%x}n", __func__, job->aio_op); } aio_job_mark_complete(job); @@ -739,26 +762,21 @@ aiost_process_singleton (struct aio_job *job) * Process all jobs in a file group. */ static void -aiost_process_fg (struct aiosp *sp, struct aiost_file_group *fg) +aiost_process_fg(struct aiosp *sp, struct aiost_file_group *fg) { - struct aio_job *job; - struct aio_job *tmp; - - TAILQ_FOREACH_SAFE(job, &fg->queue, list, tmp) { - TAILQ_REMOVE(&fg->queue, job, list); - fg->queue_size--; - - if ((job->aio_op & AIO_READ) == AIO_READ) { - io_read(job); - } else if ((job->aio_op & AIO_WRITE) == AIO_WRITE) { - io_write(job); - } else if ((job->aio_op & AIO_SYNC) == AIO_SYNC) { - io_sync(job); - } else { - panic("aio_process: invalid operation code\n"); + for (struct aio_job *job;;) { + mutex_enter(&fg->mtx); + job = TAILQ_FIRST(&fg->queue); + if (job) { + TAILQ_REMOVE(&fg->queue, job, list); + fg->queue_size--; + } + mutex_exit(&fg->mtx); + if (job == NULL) { + break; } - aio_job_mark_complete(job); + aiost_process_singleton(job); } } @@ -812,12 +830,14 @@ aiost_entry(void *arg) mutex_enter(&st->mtx); aiosp_fg_teardown(sp, fg); - } else { + } else if (st->job) { struct aio_job *job = st->job; mutex_exit(&st->mtx); aiost_process_singleton(job); mutex_enter(&st->mtx); + } else { + KASSERT(0); } /* @@ -853,31 +873,24 @@ aiost_entry(void *arg) } if (st->job) { - if (st->job->fp) { - aio_file_release(st->job->fp); - st->job->fp = NULL; - } - - aio_job_fini(st->job); - pool_put(&aio_job_pool, st->job); - atomic_dec_uint(&aio_jobs_count); + aio_job_release(st->job); } else if (st->fg) { struct aiost_file_group *fg = st->fg; st->fg = NULL; - while (!TAILQ_EMPTY(&fg->queue)) { - struct aio_job *job = TAILQ_FIRST(&fg->queue); - TAILQ_REMOVE(&fg->queue, job, list); - fg->queue_size--; - - if (job->fp) { - aio_file_release(job->fp); - job->fp = NULL; + for (struct aio_job *job;;) { + mutex_enter(&fg->mtx); + job = TAILQ_FIRST(&fg->queue); + if (job) { + TAILQ_REMOVE(&fg->queue, job, list); + fg->queue_size--; + } + mutex_exit(&fg->mtx); + if (job == NULL) { + break; } - aio_job_fini(job); - pool_put(&aio_job_pool, job); - atomic_dec_uint(&aio_jobs_count); + aio_job_release(job); } aiosp_fg_teardown(sp, fg); @@ -1122,12 +1135,15 @@ aiosp_validate_conflicts(struct aiosp *aiosp, const void *uptr) mutex_exit(&aiosp->mtx); return EINVAL; } else if (st->fg) { + mutex_enter(&st->fg->mtx); TAILQ_FOREACH(job, &st->fg->queue, list) { if (job->aiocb_uptr == uptr) { + mutex_exit(&st->fg->mtx); mutex_exit(&aiosp->mtx); return EINVAL; } } + mutex_exit(&st->fg->mtx); } } @@ -1140,7 +1156,8 @@ aiosp_validate_conflicts(struct aiosp *aiosp, const void *uptr) /* * Get error status of async I/O operation */ -int aiosp_error(struct aiosp *aiosp, const void *uptr, register_t *retval) +int +aiosp_error(struct aiosp *aiosp, const void *uptr, register_t *retval) { struct aiocbp *aiocbp = NULL; struct aio_job *job; @@ -1457,10 +1474,8 @@ int aiowaitgroup_wait(struct aiowaitgroup *wg, int timo) { int error; - - error = (timo == 0) ? - cv_wait_sig(&wg->done_cv, &wg->mtx) : - cv_timedwait_sig(&wg->done_cv, &wg->mtx, timo); + + error = cv_timedwait_sig(&wg->done_cv, &wg->mtx, timo); if (error) { if (error == EWOULDBLOCK) { error = SET_ERROR(EAGAIN); @@ -1480,7 +1495,7 @@ aiowaitgrouplk_init(struct aiowaitgrouplk *lk) mutex_init(&lk->mtx, MUTEX_DEFAULT, IPL_NONE); lk->n = 0; lk->s = 2; - lk->wgs = kmem_zalloc(lk->s * sizeof(*lk->wgs), KM_SLEEP); + lk->wgs = kmem_alloc(sizeof(*lk->wgs) * lk->s, KM_SLEEP); } /* @@ -1529,16 +1544,15 @@ aiowaitgrouplk_flush(struct aiowaitgrouplk *lk) mutex_enter(&lk->mtx); for (int i = 0; i < lk->n; i++) { struct aiowaitgroup *wg = lk->wgs[i]; - - lk->wgs[i] = NULL; if (wg == NULL) { continue; } mutex_enter(&wg->mtx); + if (wg->active) { wg->completed++; - cv_broadcast(&wg->done_cv); + cv_signal(&wg->done_cv); } if (--wg->refcnt == 0) { @@ -1579,12 +1593,10 @@ aiowaitgroup_join(struct aiowaitgroup *wg, struct aiowaitgrouplk *lk) lk->s = new_size; lk->wgs = new_wgs; } - lk->wgs[lk->n++] = wg; - - mutex_enter(&wg->mtx); + lk->wgs[lk->n] = wg; + lk->n++; wg->total++; wg->refcnt++; - mutex_exit(&wg->mtx); mutex_exit(&lk->mtx); } @@ -1685,18 +1697,16 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) a_job->completed = false; a_job->fp = NULL; - { - const int fd = aiocb.aio_fildes; - struct file *fp = fd_getfile2(p, fd); - if (fp == NULL) { - aio_job_fini(a_job); - pool_put(&aio_job_pool, a_job); - return SET_ERROR(EBADF); - } - - aio_file_hold(fp); - a_job->fp = fp; + const int fd = aiocb.aio_fildes; + struct file *fp = fd_getfile2(p, fd); + if (fp == NULL) { + aio_job_fini(a_job); + pool_put(&aio_job_pool, a_job); + return SET_ERROR(EBADF); } + + aio_file_hold(fp); + a_job->fp = fp; struct aiocbp *aiocbp = kmem_zalloc(sizeof(struct aiocbp), KM_SLEEP); aiocbp->job = a_job; @@ -1718,35 +1728,17 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) mutex_enter(&aio->aio_mtx); if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max || aio->jobs_count >= aio_listio_max) { - atomic_dec_uint(&aio_jobs_count); - mutex_exit(&aio->aio_mtx); - aiocbp_remove(&aio->aiosp, aiocb_uptr); - kmem_free(aiocbp, sizeof(*aiocbp)); - - aio_file_release(a_job->fp); - a_job->fp = NULL; - - aio_job_fini(a_job); - pool_put(&aio_job_pool, a_job); - - return SET_ERROR(EAGAIN); + error = SET_ERROR(EAGAIN); + goto error; } mutex_exit(&aio->aio_mtx); error = aiosp_enqueue_job(&aio->aiosp, a_job); if (error) { - aiocbp_remove(&aio->aiosp, aiocb_uptr); - kmem_free(aiocbp, sizeof(*aiocbp)); - - aio_file_release(a_job->fp); - a_job->fp = NULL; - - aio_job_fini(a_job); - pool_put(&aio_job_pool, a_job); - - return SET_ERROR(error); + error = SET_ERROR(EAGAIN); + goto error; } mutex_enter(&aio->aio_mtx); @@ -1757,6 +1749,18 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) mutex_exit(&aio->aio_mtx); return 0; +error: + aiocbp_remove(&aio->aiosp, aiocb_uptr); + kmem_free(aiocbp, sizeof(*aiocbp)); + + aio_file_release(a_job->fp); + a_job->fp = NULL; + + aio_job_fini(a_job); + atomic_dec_uint(&aio_jobs_count); + pool_put(&aio_job_pool, a_job); + + return SET_ERROR(error); } /* @@ -1812,6 +1816,11 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, have_active = fg->queue_size ? true : false; } + /* + * if aiocbp_uptr != NULL, then just cancel the job associated with that + * uptr. + * if aiocbp_uptr == NULL, then cancel all jobs associated with fildes. + */ if (aiocbp_uptr) { struct aiocbp *aiocbp = NULL; error = aiocbp_lookup(aiosp, &aiocbp, aiocbp_uptr); @@ -1836,15 +1845,8 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, * beign processed. */ if (job->on_queue) { - mutex_enter(&job->mtx); - TAILQ_REMOVE(&aiosp->jobs, job, list); - aiosp->jobs_pending--; - job->on_queue = false; - job->aiocbp._errno = ECANCELED; - mutex_exit(&job->mtx); - + aio_job_cancel(aiosp, job); aio_job_mark_complete(job); - *retval = AIO_CANCELED; } @@ -1860,39 +1862,32 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, mutex_exit(&aio->aio_mtx); return 0; - } - - /* - * Cancel all queued jobs associated with this file descriptor - */ - struct aio_job *tmp; - TAILQ_FOREACH_SAFE(job, &aiosp->jobs, list, tmp) { - if (job->aiocbp.aio_fildes == (int)fildes) { - mutex_enter(&job->mtx); - TAILQ_REMOVE(&aiosp->jobs, job, list); - aiosp->jobs_pending--; - job->on_queue = false; - job->aiocbp._errno = ECANCELED; - mutex_exit(&job->mtx); - - aio_job_mark_complete(job); - - canceled++; + } else { + /* + * Cancel all queued jobs associated with this file descriptor + */ + struct aio_job *tmp; + TAILQ_FOREACH_SAFE(job, &aiosp->jobs, list, tmp) { + if (job->aiocbp.aio_fildes == (int)fildes) { + aio_job_cancel(aiosp, job); + aio_job_mark_complete(job); + canceled++; + } } - } - if (canceled && !have_active) { - *retval = AIO_CANCELED; - } else if (!canceled) { - *retval = have_active ? AIO_NOTCANCELED : AIO_ALLDONE; - } else { - *retval = AIO_NOTCANCELED; - } + if (canceled && !have_active) { + *retval = AIO_CANCELED; + } else if (!canceled) { + *retval = have_active ? AIO_NOTCANCELED : AIO_ALLDONE; + } else { + *retval = AIO_NOTCANCELED; + } - mutex_exit(&aiosp->mtx); - mutex_exit(&aio->aio_mtx); + mutex_exit(&aiosp->mtx); + mutex_exit(&aio->aio_mtx); - return 0; + return 0; + } } int @@ -2245,7 +2240,8 @@ aio_print_jobs(void (*pr)(const char *, ...)) (*pr)("AIO: pid=%d\n", p->p_pid); (*pr)("AIO: global jobs=%u, proc jobs=%u\n", aio_jobs_count, aio->jobs_count); - (*pr)("AIO: sp{ total_threads=%zu active=%zu free=%zu pending=%zu processing=%lu hash_buckets=%zu mask=%#x }\n", + (*pr)("AIO: sp{ total_threads=%zu active=%zu free=%zu pending=%zu\n" + " processing=%lu hash_buckets=%zu mask=%#x }\n", sp->nthreads_total, sp->nthreads_active, sp->nthreads_free, sp->jobs_pending, (u_long)sp->njobs_processing, sp->aio_hash_size, sp->aio_hash_mask); @@ -2275,8 +2271,8 @@ aio_print_jobs(void (*pr)(const char *, ...)) if (st->job) { struct aio_job *j = st->job; (*pr)(" job: op=%d err=%d state=%d uptr=%p\n", - j->aio_op, j->aiocbp._errno, j->aiocbp._state, - j->aiocb_uptr); + j->aio_op, j->aiocbp._errno, + j->aiocbp._state, j->aiocb_uptr); (*pr)(" fd=%d off=%llu buf=%p nbytes=%zu\n", j->aiocbp.aio_fildes, (unsigned long long)j->aiocbp.aio_offset, diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 91fb493cd9331..1e3c9f7310018 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -138,6 +138,7 @@ struct aiost_file_group { RB_ENTRY(aiost_file_group) tree; struct file *fp; struct aiost *aiost; + kmutex_t mtx; TAILQ_HEAD(, aio_job) queue; size_t queue_size; }; From 5709fe22f8e02af2de1f3e530a6075308c5a4cbd Mon Sep 17 00:00:00 2001 From: Ethan Date: Tue, 2 Sep 2025 21:50:26 -0600 Subject: [PATCH 52/53] aiocbp hash lookup/removal polish --- sys/kern/sys_aio.c | 178 ++++++++++++++++----------------------------- sys/sys/aio.h | 10 +-- 2 files changed, 63 insertions(+), 125 deletions(-) diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index d874b66ea956d..055510fb02e97 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -143,9 +143,12 @@ static void aio_job_mark_complete(struct aio_job *); static void aio_file_hold(struct file *); static void aio_file_release(struct file *); -static int aiocbp_lookup_job_locked(struct aiosp *, const void *, +static void aiocbp_destroy(struct aiosp *); +static int aiocbp_init(struct aiosp *, u_int); +static int aiocbp_insert(struct aiosp *, struct aiocbp *); +static int aiocbp_lookup_job(struct aiosp *, const void *, struct aio_job **); -static int aiocbp_remove_job_locked(struct aiosp *, const void *, +static int aiocbp_remove_job(struct aiosp *, const void *, struct aio_job **, struct aiocbp **); static const struct syscall_package aio_syscalls[] = { @@ -230,8 +233,9 @@ aio_init(void) aio_ehook = exithook_establish(aio_exit, NULL); error = syscall_establish(NULL, aio_syscalls); - if (error != 0) + if (error != 0) { aio_fini(false); + } return error; } @@ -544,7 +548,7 @@ aiosp_suspend(struct aiosp *aiosp, struct aiocb **aiocbp_list, int nent, continue; } - error = aiocbp_lookup_job_locked(aiosp, aiocbp_list[i], &job); + error = aiocbp_lookup_job(aiosp, aiocbp_list[i], &job); if (error) { goto done; } @@ -1087,7 +1091,7 @@ io_sync(struct aio_job *job) job->aiocbp._errno = error; job->aiocbp._state = JOB_DONE; - copyout(&job->aiocbp, job->aiocb_uptr, sizeof(struct aiocb)); + copyout(&job->aiocbp, job->aiocb_uptr, sizeof(job->aiocbp)); return 0; } @@ -1159,24 +1163,21 @@ aiosp_validate_conflicts(struct aiosp *aiosp, const void *uptr) int aiosp_error(struct aiosp *aiosp, const void *uptr, register_t *retval) { - struct aiocbp *aiocbp = NULL; struct aio_job *job; int error = 0; - error = aiocbp_lookup(aiosp, &aiocbp, uptr); - if (error) { + error = aiocbp_lookup_job(aiosp, uptr, &job); + if (error || job == NULL) { return error; } - if (aiocbp == NULL) { - return SET_ERROR(ENOENT); - } - job = aiocbp->job; if (job->aiocbp._state == JOB_NONE) { + mutex_exit(&job->mtx); return SET_ERROR(EINVAL); } *retval = job->aiocbp._errno; + mutex_exit(&job->mtx); return error; } @@ -1191,7 +1192,7 @@ aiosp_return(struct aiosp *aiosp, const void *uptr, register_t *retval) struct aio_job *job = NULL; int error; - error = aiocbp_remove_job_locked(aiosp, uptr, &job, &handle); + error = aiocbp_remove_job(aiosp, uptr, &job, &handle); if (error) { return error; } @@ -1244,10 +1245,10 @@ aiocbp_hash(const void *uptr) } /* - * Find aiocb entry by user pointer. + * Find aiocb entry by user pointer and locks. */ static int -aiocbp_lookup_job_locked(struct aiosp *aiosp, const void *uptr, +aiocbp_lookup_job(struct aiosp *aiosp, const void *uptr, struct aio_job **jobp) { struct aiocbp *aiocbp; @@ -1273,14 +1274,14 @@ aiocbp_lookup_job_locked(struct aiosp *aiosp, const void *uptr, mutex_exit(&aiosp->aio_hash_mtx); *jobp = NULL; - return 0; + return SET_ERROR(ENOENT); } /* * Detach job and return job with job->mtx held */ static int -aiocbp_remove_job_locked(struct aiosp *aiosp, const void *uptr, +aiocbp_remove_job(struct aiosp *aiosp, const void *uptr, struct aio_job **jobp, struct aiocbp **handlep) { struct aiocbp *aiocbp; @@ -1288,7 +1289,9 @@ aiocbp_remove_job_locked(struct aiosp *aiosp, const void *uptr, u_int hash; *jobp = NULL; - *handlep = NULL; + if (handlep) { + *handlep = NULL; + } hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; mutex_enter(&aiosp->aio_hash_mtx); @@ -1301,7 +1304,9 @@ aiocbp_remove_job_locked(struct aiosp *aiosp, const void *uptr, TAILQ_REMOVE(&aiosp->aio_hash[hash], aiocbp, list); mutex_exit(&aiosp->aio_hash_mtx); - *handlep = aiocbp; + if (handlep) { + *handlep = aiocbp; + } *jobp = job; return 0; @@ -1312,53 +1317,6 @@ aiocbp_remove_job_locked(struct aiosp *aiosp, const void *uptr, return SET_ERROR(ENOENT); } -int -aiocbp_lookup(struct aiosp *aiosp, struct aiocbp **aiocbpp, const void *uptr) -{ - struct aiocbp *aiocbp; - u_int hash; - - hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; - - mutex_enter(&aiosp->aio_hash_mtx); - TAILQ_FOREACH(aiocbp, &aiosp->aio_hash[hash], list) { - if (aiocbp->uptr == uptr) { - *aiocbpp = aiocbp; - mutex_exit(&aiosp->aio_hash_mtx); - return 0; - } - } - mutex_exit(&aiosp->aio_hash_mtx); - - *aiocbpp = NULL; - return 0; -} - -/* - * Remove aiocb entry from hash table. - */ -int -aiocbp_remove(struct aiosp *aiosp, const void *uptr) -{ - struct aiocbp *aiocbp; - u_int hash; - - hash = aiocbp_hash(uptr) & aiosp->aio_hash_mask; - - struct aiocbp *tmp; - mutex_enter(&aiosp->aio_hash_mtx); - TAILQ_FOREACH_SAFE(aiocbp, &aiosp->aio_hash[hash], list, tmp) { - if (aiocbp->uptr == uptr) { - TAILQ_REMOVE(&aiosp->aio_hash[hash], aiocbp, list); - mutex_exit(&aiosp->aio_hash_mtx); - return 0; - } - } - mutex_exit(&aiosp->aio_hash_mtx); - - return SET_ERROR(ENOENT); -} - /* * Insert aiocb entry into hash table. */ @@ -1614,7 +1572,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) int error; /* Get the data structure from user-space */ - error = copyin(aiocb_uptr, &aiocb, sizeof(struct aiocb)); + error = copyin(aiocb_uptr, &aiocb, sizeof(aiocb)); if (error) { return error; } @@ -1678,7 +1636,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) aiocb._state = JOB_WIP; aiocb._errno = SET_ERROR(EINPROGRESS); aiocb._retval = -1; - error = copyout(&aiocb, aiocb_uptr, sizeof(struct aiocb)); + error = copyout(&aiocb, aiocb_uptr, sizeof(aiocb)); if (error) { return error; } @@ -1686,7 +1644,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) /* Allocate and initialize a new AIO job */ a_job = pool_get(&aio_job_pool, PR_WAITOK | PR_ZERO); - memcpy(&a_job->aiocbp, &aiocb, sizeof(struct aiocb)); + memcpy(&a_job->aiocbp, &aiocb, sizeof(aiocb)); a_job->aiocb_uptr = aiocb_uptr; a_job->aio_op |= op; a_job->lio = lio; @@ -1708,7 +1666,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) aio_file_hold(fp); a_job->fp = fp; - struct aiocbp *aiocbp = kmem_zalloc(sizeof(struct aiocbp), KM_SLEEP); + struct aiocbp *aiocbp = kmem_zalloc(sizeof(*aiocbp), KM_SLEEP); aiocbp->job = a_job; aiocbp->uptr = aiocb_uptr; error = aiocbp_insert(&aio->aiosp, aiocbp); @@ -1750,7 +1708,7 @@ aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio) return 0; error: - aiocbp_remove(&aio->aiosp, aiocb_uptr); + aiocbp_remove_job(&aio->aiosp, aiocb_uptr, &a_job, NULL); kmem_free(aiocbp, sizeof(*aiocbp)); aio_file_release(a_job->fp); @@ -1781,7 +1739,7 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, unsigned int fildes, canceled = 0; bool have_active = false; fdtab_t *dt; - int error; + int error = 0; fildes = (unsigned int)SCARG(uap, fildes); dt = atomic_load_consume(&fdp->fd_dt); @@ -1822,46 +1780,31 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, * if aiocbp_uptr == NULL, then cancel all jobs associated with fildes. */ if (aiocbp_uptr) { - struct aiocbp *aiocbp = NULL; - error = aiocbp_lookup(aiosp, &aiocbp, aiocbp_uptr); - if (error) { - mutex_exit(&aiosp->mtx); - mutex_exit(&aio->aio_mtx); - return error; + error = aiocbp_lookup_job(aiosp, aiocbp_uptr, &job); + if (error || job == NULL) { + *retval = AIO_ALLDONE; + goto finish; } - if (aiocbp) { - job = aiocbp->job; - - if (job->completed) { - *retval = AIO_ALLDONE; - } else { - *retval = AIO_NOTCANCELED; - } - /* - * If the job is on sp->job (signified by job->on_queue) - * that means that it has been distribtued yet. And if - * it is not on the queue that means it is currently - * beign processed. - */ - if (job->on_queue) { - aio_job_cancel(aiosp, job); - aio_job_mark_complete(job); - *retval = AIO_CANCELED; - } - - mutex_exit(&aiosp->mtx); - mutex_exit(&aio->aio_mtx); - - return 0; + if (job->completed) { + *retval = AIO_ALLDONE; + } else { + *retval = AIO_NOTCANCELED; } - *retval = AIO_ALLDONE; - - mutex_exit(&aiosp->mtx); - mutex_exit(&aio->aio_mtx); + /* + * If the job is on sp->job (signified by job->on_queue) + * that means that it has been distribtued yet. And if + * it is not on the queue that means it is currently + * beign processed. + */ + if (job->on_queue) { + aio_job_cancel(aiosp, job); + aio_job_mark_complete(job); + *retval = AIO_CANCELED; + } - return 0; + mutex_exit(&job->mtx); } else { /* * Cancel all queued jobs associated with this file descriptor @@ -1882,12 +1825,12 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, } else { *retval = AIO_NOTCANCELED; } - - mutex_exit(&aiosp->mtx); - mutex_exit(&aio->aio_mtx); - - return 0; } +finish: + mutex_exit(&aiosp->mtx); + mutex_exit(&aio->aio_mtx); + + return 0; } int @@ -1974,7 +1917,7 @@ sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap, if (SCARG(uap, timeout)) { /* Convert timespec to ticks */ error = copyin(SCARG(uap, timeout), &ts, - sizeof(struct timespec)); + sizeof(ts)); if (error) return error; } @@ -2042,7 +1985,7 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, switch (mode) { case LIO_WAIT: - memset(&lio->sig, 0, sizeof(struct sigevent)); + memset(&lio->sig, 0, sizeof(lio->sig)); break; case LIO_NOWAIT: /* Check for signal, validate it */ @@ -2050,15 +1993,16 @@ sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap, struct sigevent *sig = &lio->sig; error = copyin(SCARG(uap, sig), &lio->sig, - sizeof(struct sigevent)); + sizeof(lio->sig)); if (error == 0 && (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG || sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA)) error = SET_ERROR(EINVAL); - } else - memset(&lio->sig, 0, sizeof(struct sigevent)); + } else { + memset(&lio->sig, 0, sizeof(lio->sig)); + } break; default: error = SET_ERROR(EINVAL); diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 1e3c9f7310018..183314634ee01 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -214,14 +214,8 @@ int aiosp_suspend(struct aiosp *, struct aiocb **, int, struct timespec *, int); int aiosp_flush(struct aiosp *); int aiosp_validate_conflicts(struct aiosp *, const void *); -int aiosp_error (struct aiosp *, const void *, register_t *); -int aiosp_return (struct aiosp *, const void *, register_t *); - -void aiocbp_destroy(struct aiosp *); -int aiocbp_init(struct aiosp *, u_int); -int aiocbp_lookup(struct aiosp *, struct aiocbp **, const void *); -int aiocbp_remove(struct aiosp *, const void *); -int aiocbp_insert(struct aiosp *, struct aiocbp *); +int aiosp_error(struct aiosp *, const void *, register_t *); +int aiosp_return(struct aiosp *, const void *, register_t *); void aiowaitgroup_init(struct aiowaitgroup *); void aiowaitgroup_fini(struct aiowaitgroup *); From cc4875ac7daa24eb4c9a477d18d3615a665bbabc Mon Sep 17 00:00:00 2001 From: Ethan Date: Tue, 2 Sep 2025 22:38:14 -0600 Subject: [PATCH 53/53] sys_aio: ensure test source abides by style --- tests/lib/libc/sys/t_aio_cancel.c | 6 +++--- tests/lib/libc/sys/t_aio_lio.c | 6 +++--- tests/lib/libc/sys/t_aio_rw.c | 6 +++--- tests/lib/libc/sys/t_aio_suspend.c | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/lib/libc/sys/t_aio_cancel.c b/tests/lib/libc/sys/t_aio_cancel.c index 25e32b52c02d2..64bdd43561d30 100644 --- a/tests/lib/libc/sys/t_aio_cancel.c +++ b/tests/lib/libc/sys/t_aio_cancel.c @@ -47,7 +47,7 @@ static void fill_pattern(uint8_t *, size_t, uint8_t); static void wait_all(const struct aiocb * const [], size_t); static int -mktemp_file (char *path, size_t pathlen) +mktemp_file(char *path, size_t pathlen) { int fd, n; @@ -61,7 +61,7 @@ mktemp_file (char *path, size_t pathlen) } static void -fill_pattern (uint8_t *buf, size_t len, uint8_t seed) +fill_pattern(uint8_t *buf, size_t len, uint8_t seed) { size_t i; @@ -71,7 +71,7 @@ fill_pattern (uint8_t *buf, size_t len, uint8_t seed) } static void -wait_all (const struct aiocb * const list[], size_t nent) +wait_all(const struct aiocb * const list[], size_t nent) { size_t i; int pending, rv; diff --git a/tests/lib/libc/sys/t_aio_lio.c b/tests/lib/libc/sys/t_aio_lio.c index 19c19d39246c4..c841c9ed3376a 100644 --- a/tests/lib/libc/sys/t_aio_lio.c +++ b/tests/lib/libc/sys/t_aio_lio.c @@ -46,7 +46,7 @@ static void fill_pattern(uint8_t *, size_t, uint8_t); static void wait_all(const struct aiocb * const [], size_t); static int -mktemp_file (char *path, size_t pathlen) +mktemp_file(char *path, size_t pathlen) { int fd, n; @@ -60,7 +60,7 @@ mktemp_file (char *path, size_t pathlen) } static void -fill_pattern (uint8_t *buf, size_t len, uint8_t seed) +fill_pattern(uint8_t *buf, size_t len, uint8_t seed) { size_t i; @@ -70,7 +70,7 @@ fill_pattern (uint8_t *buf, size_t len, uint8_t seed) } static void -wait_all (const struct aiocb * const list[], size_t nent) +wait_all(const struct aiocb * const list[], size_t nent) { size_t i; int pending, rv; diff --git a/tests/lib/libc/sys/t_aio_rw.c b/tests/lib/libc/sys/t_aio_rw.c index f2491eb254bf6..0c47d0f76c338 100644 --- a/tests/lib/libc/sys/t_aio_rw.c +++ b/tests/lib/libc/sys/t_aio_rw.c @@ -47,7 +47,7 @@ static void fill_pattern(uint8_t *, size_t, uint8_t); static void wait_all(const struct aiocb * const [], size_t); static int -mktemp_file (char *path, size_t pathlen) +mktemp_file(char *path, size_t pathlen) { int fd, n; @@ -61,7 +61,7 @@ mktemp_file (char *path, size_t pathlen) } static void -fill_pattern (uint8_t *buf, size_t len, uint8_t seed) +fill_pattern(uint8_t *buf, size_t len, uint8_t seed) { size_t i; @@ -71,7 +71,7 @@ fill_pattern (uint8_t *buf, size_t len, uint8_t seed) } static void -wait_all (const struct aiocb * const list[], size_t nent) +wait_all(const struct aiocb * const list[], size_t nent) { size_t i; int pending, rv, error; diff --git a/tests/lib/libc/sys/t_aio_suspend.c b/tests/lib/libc/sys/t_aio_suspend.c index f62271e16e06e..6f766d5a0e685 100644 --- a/tests/lib/libc/sys/t_aio_suspend.c +++ b/tests/lib/libc/sys/t_aio_suspend.c @@ -47,7 +47,7 @@ static void fill_pattern(uint8_t *, size_t, uint8_t); static void wait_cb(struct aiocb *); static int -mktemp_file (char *path, size_t pathlen) +mktemp_file(char *path, size_t pathlen) { int fd, n; @@ -61,7 +61,7 @@ mktemp_file (char *path, size_t pathlen) } static void -fill_pattern (uint8_t *buf, size_t len, uint8_t seed) +fill_pattern(uint8_t *buf, size_t len, uint8_t seed) { size_t i; @@ -71,7 +71,7 @@ fill_pattern (uint8_t *buf, size_t len, uint8_t seed) } static void -wait_cb (struct aiocb *cb) +wait_cb(struct aiocb *cb) { const struct aiocb *one[1]; int rv;