| /*- |
| * Copyright (c) 1992, 1993 |
| * The Regents of the University of California. All rights reserved. |
| * |
| * This code is derived from software donated to Berkeley by |
| * Jan-Simon Pendry. |
| * |
| * Copyright 2020-2026 Rivoreo |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. Neither the name of the University nor the names of its contributors |
| * may be used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| */ |
| |
| #include <sys/param.h> |
| #include <sys/systm.h> |
| #include <sys/capsicum.h> |
| #include <sys/conf.h> |
| #include <sys/dirent.h> |
| #include <sys/filedesc.h> |
| #include <sys/kernel.h> /* boottime */ |
| #include <sys/lock.h> |
| #include <sys/mutex.h> |
| #include <sys/malloc.h> |
| #include <sys/file.h> /* Must come after sys/malloc.h */ |
| #include <sys/mount.h> |
| #include <sys/namei.h> |
| #include <sys/proc.h> |
| #include <sys/stat.h> |
| #include <sys/vnode.h> |
| #include "fdfs.h" |
| |
| #define NFDCACHE 4 |
| #define FD_NHASH(ix) \ |
| (&fdhashtbl[(ix) & fdhash]) |
| static LIST_HEAD(fdhashhead, fdfsnode) *fdhashtbl; |
| static u_long fdhash; |
| |
| struct mtx fdfs_hashmtx; |
| |
| static vop_getattr_t fdfs_getattr; |
| static vop_lookup_t fdfs_lookup; |
| static vop_open_t fdfs_open; |
| static vop_readdir_t fdfs_readdir; |
| static vop_reclaim_t fdfs_reclaim; |
| static vop_setattr_t fdfs_setattr; |
| |
| static struct vop_vector fdfs_vnodeops = { |
| .vop_default = &default_vnodeops, |
| .vop_access = VOP_NULL, |
| .vop_getattr = fdfs_getattr, |
| .vop_lookup = fdfs_lookup, |
| .vop_open = fdfs_open, |
| .vop_pathconf = vop_stdpathconf, |
| .vop_readdir = fdfs_readdir, |
| .vop_reclaim = fdfs_reclaim, |
| .vop_setattr = fdfs_setattr, |
| }; |
| |
| static void fdfs_insmntque_dtr(struct vnode *, void *); |
| static void fdfs_remove_entry(struct fdfsnode *); |
| |
| /* |
| * Initialise cache headers |
| */ |
| int |
| fdfs_init(struct vfsconf *vfsp) |
| { |
| mtx_init(&fdfs_hashmtx, "fdfs_hash", NULL, MTX_DEF); |
| fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash); |
| return (0); |
| } |
| |
| /* |
| * Uninit ready for unload. |
| */ |
| int |
| fdfs_uninit(struct vfsconf *vfsp) |
| { |
| hashdestroy(fdhashtbl, M_CACHE, fdhash); |
| mtx_destroy(&fdfs_hashmtx); |
| return (0); |
| } |
| |
| /* |
| * If allocating vnode fails, call this. |
| */ |
| static void |
| fdfs_insmntque_dtr(struct vnode *vp, void *arg) |
| { |
| vgone(vp); |
| vput(vp); |
| } |
| |
| /* |
| * Remove an entry from the hash if it exists. |
| */ |
| static void |
| fdfs_remove_entry(struct fdfsnode *fd) |
| { |
| struct fdfsnode *fd2; |
| struct fdhashhead *fc = FD_NHASH(fd->fd_ix); |
| mtx_lock(&fdfs_hashmtx); |
| LIST_FOREACH(fd2, fc, fd_hash) { |
| if (fd == fd2) { |
| LIST_REMOVE(fd, fd_hash); |
| break; |
| } |
| } |
| mtx_unlock(&fdfs_hashmtx); |
| } |
| |
| static enum vtype fdfs_get_fd_type(unsigned int fd) { |
| cap_rights_t rights; |
| struct file *fp; |
| int e = fget(curthread, fd, cap_rights_init(&rights), &fp); |
| if(e) return VCHR; |
| |
| enum vtype vtype; |
| switch(fp->f_type) { |
| case DTYPE_VNODE: |
| vtype = fp->f_vnode ? fp->f_vnode->v_type : VCHR; |
| break; |
| case DTYPE_SOCKET: |
| vtype = VSOCK; |
| break; |
| case DTYPE_PIPE: |
| case DTYPE_FIFO: |
| vtype = VFIFO; |
| break; |
| default: |
| vtype = VCHR; |
| break; |
| } |
| fdrop(fp, curthread); |
| return vtype; |
| } |
| |
| int |
| fdfs_allocvp(fdntype ftype, unsigned int fd_fd, int ix, struct mount *mp, |
| struct vnode **vpp) |
| { |
| struct fdhashhead *fc; |
| struct fdfsnode *fd, *fd2; |
| struct vnode *vp, *vp2; |
| struct thread *td; |
| int error = 0; |
| |
| td = curthread; |
| fc = FD_NHASH(ix); |
| loop: |
| mtx_lock(&fdfs_hashmtx); |
| /* |
| * If a forced unmount is progressing, we need to drop it. The flags are |
| * protected by the hashmtx. |
| */ |
| struct fdfsmount *fmp = (struct fdfsmount *)mp->mnt_data; |
| if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { |
| mtx_unlock(&fdfs_hashmtx); |
| return (-1); |
| } |
| |
| LIST_FOREACH(fd, fc, fd_hash) { |
| if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) { |
| /* Get reference to vnode in case it's being free'd */ |
| vp = fd->fd_vnode; |
| VI_LOCK(vp); |
| mtx_unlock(&fdfs_hashmtx); |
| if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) |
| goto loop; |
| *vpp = vp; |
| return (0); |
| } |
| } |
| mtx_unlock(&fdfs_hashmtx); |
| |
| fd = malloc(sizeof(struct fdfsnode), M_TEMP, M_WAITOK); |
| |
| error = getnewvnode("fdfs", mp, &fdfs_vnodeops, &vp); |
| if (error) { |
| free(fd, M_TEMP); |
| return (error); |
| } |
| vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
| vp->v_data = fd; |
| if(ftype == Froot) vp->v_type = VDIR; |
| fd->fd_vnode = vp; |
| fd->fd_type = ftype; |
| fd->fd_fd = fd_fd; |
| fd->fd_ix = ix; |
| error = insmntque1(vp, mp, fdfs_insmntque_dtr, NULL); |
| if (error != 0) { |
| *vpp = NULLVP; |
| return (error); |
| } |
| |
| /* Make sure that someone didn't beat us when inserting the vnode. */ |
| mtx_lock(&fdfs_hashmtx); |
| /* |
| * If a forced unmount is progressing, we need to drop it. The flags are |
| * protected by the hashmtx. |
| */ |
| fmp = (struct fdfsmount *)mp->mnt_data; |
| if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { |
| mtx_unlock(&fdfs_hashmtx); |
| vgone(vp); |
| vput(vp); |
| *vpp = NULLVP; |
| return (-1); |
| } |
| |
| LIST_FOREACH(fd2, fc, fd_hash) { |
| if (fd2->fd_ix == ix && fd2->fd_vnode->v_mount == mp) { |
| /* Get reference to vnode in case it's being free'd */ |
| vp2 = fd2->fd_vnode; |
| VI_LOCK(vp2); |
| mtx_unlock(&fdfs_hashmtx); |
| error = vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, td); |
| /* Someone beat us, dec use count and wait for reclaim */ |
| vgone(vp); |
| vput(vp); |
| /* If we didn't get it, return no vnode. */ |
| if (error) vp2 = NULLVP; |
| *vpp = vp2; |
| return (error); |
| } |
| } |
| |
| /* If we came here, we can insert it safely. */ |
| LIST_INSERT_HEAD(fc, fd, fd_hash); |
| mtx_unlock(&fdfs_hashmtx); |
| *vpp = vp; |
| return (0); |
| } |
| |
| struct fdfs_get_ino_args { |
| fdntype ftype; |
| unsigned int fd_fd; |
| int ix; |
| struct file *fp; |
| struct thread *td; |
| }; |
| |
| static int |
| fdfs_get_ino_alloc(struct mount *mp, void *arg, int lkflags, |
| struct vnode **rvp) |
| { |
| struct fdfs_get_ino_args *a; |
| int error; |
| |
| a = arg; |
| error = fdfs_allocvp(a->ftype, a->fd_fd, a->ix, mp, rvp); |
| fdrop(a->fp, a->td); |
| return (error); |
| } |
| |
| |
| /* |
| * vp is the current namei directory |
| * ndp is the name to locate in that directory... |
| */ |
| static int |
| fdfs_lookup(struct vop_lookup_args *ap) |
| { |
| struct vnode **vpp = ap->a_vpp; |
| struct vnode *dvp = ap->a_dvp; |
| struct componentname *cnp = ap->a_cnp; |
| char *pname = cnp->cn_nameptr; |
| struct thread *td = cnp->cn_thread; |
| struct file *fp; |
| struct fdfs_get_ino_args arg; |
| cap_rights_t rights; |
| int nlen = cnp->cn_namelen; |
| u_int fd, fd1; |
| int error; |
| struct vnode *fvp; |
| |
| if ((cnp->cn_flags & ISLASTCN) && |
| (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { |
| error = EROFS; |
| goto bad; |
| } |
| |
| #if 0 |
| if (VTOFDFS(dvp)->fd_type == Fdesc) { |
| error = fget(td, VTOFDFS(dvp)->fd_fd, cap_rights_init(&rights), &fp); |
| if(error) goto bad; |
| |
| if(fp->f_type != DTYPE_VNODE || !fp->f_vnode) { |
| fdrop(fp, td); |
| error = ENOTDIR; |
| goto bad; |
| } |
| |
| struct vnode *dirvp = fp->f_vnode; |
| vref(dirvp); |
| fdrop(fp, td); |
| |
| vn_lock(dirvp, LK_EXCLUSIVE | LK_RETRY); |
| error = VOP_LOOKUP(dirvp, vpp, cnp); |
| VOP_UNLOCK(dirvp, 0); |
| vrele(dirvp); |
| return error; |
| } |
| #endif |
| |
| if (cnp->cn_namelen == 1 && *pname == '.') { |
| *vpp = dvp; |
| VREF(dvp); |
| return (0); |
| } |
| |
| if (VTOFDFS(dvp)->fd_type != Froot) { |
| error = ENOTDIR; |
| goto bad; |
| } |
| |
| fd = 0; |
| /* the only time a leading 0 is acceptable is if it's "0" */ |
| if (*pname == '0' && nlen != 1) { |
| error = ENOENT; |
| goto bad; |
| } |
| while (nlen--) { |
| if (*pname < '0' || *pname > '9') { |
| error = ENOENT; |
| goto bad; |
| } |
| fd1 = 10 * fd + *pname++ - '0'; |
| if (fd1 < fd) { |
| error = ENOENT; |
| goto bad; |
| } |
| fd = fd1; |
| } |
| |
| /* |
| * No rights to check since 'fp' isn't actually used. |
| */ |
| error = fget(td, fd, cap_rights_init(&rights), &fp); |
| if(error) goto bad; |
| |
| // Use the underlying vnode if available |
| if(fp->f_type == DTYPE_VNODE && fp->f_vnode) { |
| fvp = fp->f_vnode; |
| if(fvp == dvp) { |
| vref(fvp); |
| fdrop(fp, td); |
| *vpp = fvp; |
| return 0; |
| } |
| vhold(dvp); |
| VOP_UNLOCK(dvp, 0); |
| vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); |
| vref(fvp); |
| fdrop(fp, td); |
| vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); |
| vdrop(dvp); |
| if(dvp->v_iflag & VI_DOOMED) { |
| vput(fvp); |
| error = ENOENT; |
| goto bad; |
| } |
| *vpp = fvp; |
| return 0; |
| } |
| |
| /* Check if we're looking up ourselves. */ |
| if (VTOFDFS(dvp)->fd_ix == FD_DESC + fd) { |
| /* |
| * In case we're holding the last reference to the file, the dvp |
| * will be re-acquired. |
| */ |
| vhold(dvp); |
| VOP_UNLOCK(dvp, 0); |
| fdrop(fp, td); |
| |
| /* Re-aquire the lock afterwards. */ |
| vn_lock(dvp, LK_RETRY | LK_EXCLUSIVE); |
| vdrop(dvp); |
| fvp = dvp; |
| if ((dvp->v_iflag & VI_DOOMED) != 0) |
| error = ENOENT; |
| } else { |
| /* |
| * Unlock our root node (dvp) when doing this, since we might |
| * deadlock since the vnode might be locked by another thread |
| * and the root vnode lock will be obtained afterwards (in case |
| * we're looking up the fd of the root vnode), which will be the |
| * opposite lock order. Vhold the root vnode first so we don't |
| * lose it. |
| */ |
| arg.ftype = Fdesc; |
| arg.fd_fd = fd; |
| arg.ix = FD_DESC + fd; |
| arg.fp = fp; |
| arg.td = td; |
| error = vn_vget_ino_gen(dvp, fdfs_get_ino_alloc, &arg, |
| LK_EXCLUSIVE, &fvp); |
| } |
| |
| if (error) |
| goto bad; |
| *vpp = fvp; |
| return (0); |
| |
| bad: |
| *vpp = NULL; |
| return (error); |
| } |
| |
| static int |
| fdfs_open(struct vop_open_args *ap) |
| { |
| struct vnode *vp = ap->a_vp; |
| |
| if (VTOFDFS(vp)->fd_type == Froot) return (0); |
| |
| /* |
| * XXX Kludge: set td->td_proc->p_dupfd to contain the value of the file |
| * descriptor being sought for duplication. The error return ensures |
| * that the vnode for this device will be released by vn_open. Open |
| * will detect this special error and take the actions in dupfdopen. |
| * Other callers of vn_open or VOP_OPEN will simply report the |
| * error. |
| */ |
| ap->a_td->td_dupfd = VTOFDFS(vp)->fd_fd; /* XXX */ |
| return (ENODEV); |
| } |
| |
| static int |
| fdfs_getattr(struct vop_getattr_args *ap) { |
| struct vnode *vp = ap->a_vp; |
| struct vattr *vap = ap->a_vap; |
| struct thread *td = curthread; |
| |
| struct bintime boot_time; |
| getboottimebin(&boot_time); |
| |
| vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; |
| vap->va_fileid = VTOFDFS(vp)->fd_ix; |
| vap->va_blocksize = DEV_BSIZE; |
| bintime2timespec(&boot_time, &vap->va_atime); |
| vap->va_mtime = vap->va_atime; |
| vap->va_ctime = vap->va_mtime; |
| vap->va_birthtime = vap->va_ctime; |
| vap->va_gen = 0; |
| vap->va_flags = 0; |
| vap->va_bytes = 0; |
| vap->va_filerev = 0; |
| |
| switch (VTOFDFS(vp)->fd_type) { |
| struct file *f; |
| cap_rights_t rights; |
| int e; |
| |
| case Froot: |
| vap->va_type = VDIR; |
| vap->va_nlink = 2; |
| vap->va_size = DEV_BSIZE; |
| vap->va_rdev = NODEV; |
| vap->va_uid = 0; |
| vap->va_gid = 0; |
| break; |
| |
| case Fdesc: |
| vap->va_type = fdfs_get_fd_type(VTOFDFS(vp)->fd_fd); |
| vap->va_nlink = 1; |
| vap->va_size = 0; |
| vap->va_rdev = makedev(0, vap->va_fileid); |
| vap->va_uid = td->td_ucred->cr_uid; |
| vap->va_gid = td->td_ucred->cr_gid; |
| |
| e = fget(td, VTOFDFS(vp)->fd_fd, cap_rights_init(&rights), &f); |
| if(!e) { |
| e = ENOTSUP; |
| if(f->f_type == DTYPE_VNODE && f->f_vnode) { |
| struct vnode *back_vn = f->f_vnode; |
| vn_lock(back_vn, LK_SHARED | LK_RETRY); |
| e = VOP_GETATTR(back_vn, vap, ap->a_cred); |
| VOP_UNLOCK(back_vn, 0); |
| } |
| if(e) { |
| struct stat st; |
| e = fo_stat(f, &st, ap->a_cred, td); |
| if(!e) { |
| vap->va_mode = st.st_mode; |
| vap->va_nlink = st.st_nlink; |
| vap->va_uid = st.st_uid; |
| vap->va_gid = st.st_gid; |
| vap->va_fsid = st.st_dev; |
| vap->va_fileid = st.st_ino; |
| vap->va_size = st.st_size; |
| vap->va_blocksize = st.st_blksize; |
| vap->va_atime = st.st_atim; |
| vap->va_mtime = st.st_mtim; |
| vap->va_ctime = st.st_ctim; |
| vap->va_birthtime = st.st_birthtim; |
| vap->va_gen = st.st_gen; |
| vap->va_flags = st.st_flags; |
| vap->va_rdev = st.st_rdev; |
| vap->va_bytes = st.st_blocks * S_BLKSIZE; |
| } |
| } |
| fdrop(f, td); |
| } |
| break; |
| |
| default: |
| panic("fdfs_getattr: fd_type = %u", VTOFDFS(vp)->fd_type); |
| } |
| |
| return 0; |
| } |
| |
| static int |
| fdfs_setattr(struct vop_setattr_args *ap) |
| { |
| struct vattr *vap = ap->a_vap; |
| struct vnode *vp; |
| struct mount *mp; |
| struct file *fp; |
| struct thread *td = curthread; |
| cap_rights_t rights; |
| unsigned fd; |
| int error; |
| |
| /* |
| * Can't mess with the root vnode |
| */ |
| if (VTOFDFS(ap->a_vp)->fd_type == Froot) return (EACCES); |
| |
| fd = VTOFDFS(ap->a_vp)->fd_fd; |
| |
| /* |
| * Allow setattr where there is an underlying vnode. |
| */ |
| error = getvnode(td, fd, |
| cap_rights_init(&rights, CAP_EXTATTR_SET), &fp); |
| if (error) { |
| /* |
| * getvnode() returns EINVAL if the file descriptor is not |
| * backed by a vnode. Silently drop all changes except |
| * chflags(2) in this case. |
| */ |
| if (error == EINVAL) { |
| if (vap->va_flags != VNOVAL) |
| error = EOPNOTSUPP; |
| else |
| error = 0; |
| } |
| return (error); |
| } |
| vp = fp->f_vnode; |
| if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) == 0) { |
| vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); |
| error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred); |
| VOP_UNLOCK(vp, 0); |
| vn_finished_write(mp); |
| } |
| fdrop(fp, td); |
| return (error); |
| } |
| |
| #define UIO_MX 16 |
| |
| static int |
| fdfs_readdir(struct vop_readdir_args *ap) |
| { |
| struct uio *uio = ap->a_uio; |
| struct filedesc *fdp; |
| struct dirent d; |
| struct dirent *dp = &d; |
| int error, i, off, fcnt; |
| |
| if (VTOFDFS(ap->a_vp)->fd_type != Froot) { |
| panic("fdfs_readdir: not fdfs root directory"); |
| } |
| |
| if (ap->a_ncookies != NULL) |
| *ap->a_ncookies = 0; |
| |
| off = (int)uio->uio_offset; |
| if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 || |
| uio->uio_resid < UIO_MX) |
| return (EINVAL); |
| i = (u_int)off / UIO_MX; |
| fdp = uio->uio_td->td_proc->p_fd; |
| error = 0; |
| |
| fcnt = i - 2; /* The first two nodes are `.' and `..' */ |
| |
| FILEDESC_SLOCK(fdp); |
| while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) { |
| bzero((caddr_t)dp, UIO_MX); |
| switch (i) { |
| case 0: /* `.' */ |
| case 1: /* `..' */ |
| dp->d_fileno = i + FD_ROOT; |
| dp->d_namlen = i + 1; |
| dp->d_reclen = UIO_MX; |
| bcopy("..", dp->d_name, dp->d_namlen); |
| dp->d_name[i + 1] = '\0'; |
| dp->d_type = DT_DIR; |
| break; |
| default: |
| if(!fdp->fd_ofiles[fcnt].fde_file) break; |
| dp->d_namlen = sprintf(dp->d_name, "%d", fcnt); |
| dp->d_reclen = UIO_MX; |
| switch(fdfs_get_fd_type(fcnt)) { |
| case VREG: |
| dp->d_type = DT_REG; |
| break; |
| case VDIR: |
| dp->d_type = DT_DIR; |
| break; |
| case VBLK: |
| dp->d_type = DT_BLK; |
| break; |
| case VCHR: |
| dp->d_type = DT_CHR; |
| break; |
| case VLNK: |
| dp->d_type = DT_LNK; |
| break; |
| case VSOCK: |
| dp->d_type = DT_SOCK; |
| break; |
| case VFIFO: |
| dp->d_type = DT_FIFO; |
| break; |
| default: |
| dp->d_type = DT_UNKNOWN; |
| break; |
| } |
| dp->d_fileno = i + FD_DESC; |
| break; |
| } |
| if (dp->d_namlen != 0) { |
| /* |
| * And ship to userland |
| */ |
| FILEDESC_SUNLOCK(fdp); |
| error = uiomove(dp, UIO_MX, uio); |
| if (error) |
| goto done; |
| FILEDESC_SLOCK(fdp); |
| } |
| i++; |
| fcnt++; |
| } |
| FILEDESC_SUNLOCK(fdp); |
| |
| done: |
| uio->uio_offset = i * UIO_MX; |
| return (error); |
| } |
| |
| static int |
| fdfs_reclaim(struct vop_reclaim_args *ap) |
| { |
| struct vnode *vp; |
| struct fdfsnode *fd; |
| |
| vp = ap->a_vp; |
| fd = VTOFDFS(vp); |
| fdfs_remove_entry(fd); |
| free(vp->v_data, M_TEMP); |
| vp->v_data = NULL; |
| return (0); |
| } |