| /* |
| * Copyright (c) 1982, 1986, 1989, 1993 |
| * The Regents of the University of California. All rights reserved. |
| * (c) UNIX System Laboratories, Inc. |
| * All or some portions of this file are derived from material licensed |
| * to the University of California by American Telephone and Telegraph |
| * Co. or Unix System Laboratories, Inc. and are reproduced herein with |
| * the permission of UNIX System Laboratories, Inc. |
| * Copyright 2015-2026 Rivoreo |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer |
| * in this position and unchanged. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include <sys/cdefs.h> |
| #include <sys/param.h> |
| #include <sys/systm.h> |
| #include <sys/kernel.h> |
| #include <sys/module.h> |
| #include <sys/sysctl.h> |
| #include <sys/filio.h> |
| #include <sys/namei.h> |
| #include <sys/mount.h> |
| #include <sys/vnode.h> |
| #include <sys/buf.h> |
| #include <sys/priv.h> |
| #include <sys/conf.h> |
| #if __FreeBSD_version < 1100066 |
| #include <sys/capability.h> |
| #else |
| #include <sys/capsicum.h> |
| #endif |
| |
| #include <security/mac/mac_framework.h> |
| #include <security/audit/audit.h> |
| #include <sys/jail.h> |
| |
| #ifndef SDT_PROBE2 |
| #define SDT_PROBE2(...) |
| #endif |
| |
| #ifndef SDT_PROBE3 |
| #define SDT_PROBE3(...) |
| #endif |
| |
| #if __FreeBSD_version < 1100507 |
| #define vrefact VREF |
| #endif |
| |
| #include <sys/stat.h> |
| |
| #include <ufs/ufs/quota.h> |
| #include <ufs/ufs/inode.h> |
| #include <ufs/ufs/extattr.h> |
| #include <ufs/ufs/ufsmount.h> |
| #include <ufs/ufs/ufs_extern.h> |
| #define UFS_DIP_SET DIP_SET |
| #define UFS_VTOI VTOI |
| #define ufs_inode inode |
| |
| #if 0 |
| /* |
| * For dotdot lookups in capability mode, only allow the component |
| * lookup to succeed if the resulting directory was already traversed |
| * during the operation. Also fail dotdot lookups for non-local |
| * filesystems, where external agents might assist local lookups to |
| * escape the compartment. |
| */ |
| static int |
| nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp, int lookup_cap_dotdot_nonlocal) |
| { |
| #if 0 |
| struct nameicap_tracker *nt; |
| struct mount *mp; |
| |
| if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp == NULL || |
| dp->v_type != VDIR) |
| return (0); |
| mp = dp->v_mount; |
| if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL && |
| (mp->mnt_flag & MNT_LOCAL) == 0) |
| return (ENOTCAPABLE); |
| TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head, |
| nm_link) { |
| if (dp == nt->dp) |
| return (0); |
| } |
| #endif |
| return (ENOTCAPABLE); |
| } |
| #endif |
| |
| static int compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags) { |
| if (mp == NULL || ((lkflags & LK_SHARED) && |
| (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) || |
| ((cnflags & ISDOTDOT) && |
| (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) { |
| lkflags &= ~LK_SHARED; |
| lkflags |= LK_EXCLUSIVE; |
| } |
| lkflags |= LK_NODDLKTREAT; |
| return (lkflags); |
| } |
| |
| static int needs_exclusive_leaf(struct mount *mp, int flags) { |
| /* |
| * Intermediate nodes can use shared locks, we only need to |
| * force an exclusive lock for leaf nodes. |
| */ |
| if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF)) |
| return (0); |
| |
| /* Always use exclusive locks if LOCKSHARED isn't set. */ |
| if (!(flags & LOCKSHARED)) |
| return (1); |
| |
| /* |
| * For lookups during open(), if the mount point supports |
| * extended shared operations, then use a shared lock for the |
| * leaf node, otherwise use an exclusive lock. |
| */ |
| if ((flags & ISOPEN) != 0) |
| return (!MNT_EXTENDED_SHARED(mp)); |
| |
| /* |
| * Lookup requests outside of open() that specify LOCKSHARED |
| * only need a shared lock on the leaf vnode. |
| */ |
| return (0); |
| } |
| |
| #if 0 |
| struct nameicap_tracker { |
| struct vnode *dp; |
| TAILQ_ENTRY(nameicap_tracker) nm_link; |
| }; |
| |
| static void nameicap_cleanup(struct nameidata *ndp) |
| { |
| struct nameicap_tracker *nt, *nt1; |
| |
| KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) || |
| (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative")); |
| TAILQ_FOREACH_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { |
| TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link); |
| vdrop(nt->dp); |
| uma_zfree(nt_zone, nt); |
| } |
| } |
| #endif |
| |
| static void namei_cleanup_cnp(struct componentname *cnp) { |
| uma_zfree(namei_zone, cnp->cn_pnbuf); |
| } |
| |
| static int namei_handle_root(struct nameidata *ndp, struct vnode **dpp) { |
| struct componentname *cnp = &ndp->ni_cnd; |
| #ifdef NI_LCF_STRICTRELATIVE |
| if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) { |
| #ifdef KTRACE |
| if (KTRPOINT(curthread, KTR_CAPFAIL)) |
| ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); |
| #endif |
| printf("vfsroot debug: namei_handle_root: ENOTCAPABLE\n"); |
| return (ENOTCAPABLE); |
| } |
| #endif |
| while (*(cnp->cn_nameptr) == '/') { |
| cnp->cn_nameptr++; |
| ndp->ni_pathlen--; |
| } |
| *dpp = ndp->ni_rootdir; |
| vrefact(*dpp); |
| return (0); |
| } |
| |
| static int relaxed_lookup(struct nameidata *ndp, int lookup_shared) { |
| char *cp; /* pointer into pathname argument */ |
| char *prev_ni_next; /* saved ndp->ni_next */ |
| struct vnode *dp = NULL; /* the directory we are searching */ |
| struct vnode *tdp; /* saved dp */ |
| struct mount *mp; /* mount table entry */ |
| struct prison *pr; |
| size_t prev_ni_pathlen; /* saved ndp->ni_pathlen */ |
| int docache; /* == 0 do not cache last component */ |
| int wantparent; /* 1 => wantparent or lockparent flag */ |
| int rdonly; /* lookup read-only flag bit */ |
| int error = 0; |
| int dpunlocked = 0; /* dp has already been unlocked */ |
| int relookup = 0; /* do not consume the path component */ |
| struct componentname *cnp = &ndp->ni_cnd; |
| int lkflags_save; |
| int ni_dvp_unlocked; |
| int lookup_cap_dotdot_nonlocal = 1; // TODO: get from sysctl |
| /* |
| * Setup: break out flag bits into variables. |
| */ |
| ni_dvp_unlocked = 0; |
| wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); |
| KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, |
| ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); |
| docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; |
| if (cnp->cn_nameiop == DELETE || |
| (wantparent && cnp->cn_nameiop != CREATE && |
| cnp->cn_nameiop != LOOKUP)) { |
| docache = 0; |
| } |
| rdonly = cnp->cn_flags & RDONLY; |
| cnp->cn_flags &= ~ISSYMLINK; |
| ndp->ni_dvp = NULL; |
| /* |
| * We use shared locks until we hit the parent of the last cn then |
| * we adjust based on the requesting flags. |
| */ |
| cnp->cn_lkflags = lookup_shared ? LK_SHARED : LK_EXCLUSIVE; |
| dp = ndp->ni_startdir; |
| ndp->ni_startdir = NULLVP; |
| vn_lock(dp, |
| compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, |
| cnp->cn_flags)); |
| |
| dirloop: |
| /* |
| * Search a new directory. |
| * |
| * The last component of the filename is left accessible via |
| * cnp->cn_nameptr for callers that need the name. Callers needing |
| * the name set the SAVENAME flag. When done, they assume |
| * responsibility for freeing the pathname buffer. |
| */ |
| for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) |
| continue; |
| cnp->cn_namelen = cp - cnp->cn_nameptr; |
| if (cnp->cn_namelen > NAME_MAX) { |
| error = ENAMETOOLONG; |
| goto bad; |
| } |
| prev_ni_pathlen = ndp->ni_pathlen; |
| ndp->ni_pathlen -= cnp->cn_namelen; |
| KASSERT(ndp->ni_pathlen <= PATH_MAX, |
| ("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen)); |
| prev_ni_next = ndp->ni_next; |
| ndp->ni_next = cp; |
| |
| /* |
| * Replace multiple slashes by a single slash and trailing slashes |
| * by a null. This must be done before VOP_LOOKUP() because some |
| * fs's don't know about trailing slashes. Remember if there were |
| * trailing slashes to handle symlinks, existing non-directories |
| * and non-existing files that won't be directories specially later. |
| */ |
| while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { |
| cp++; |
| ndp->ni_pathlen--; |
| if (*cp == '\0') { |
| *ndp->ni_next = '\0'; |
| cnp->cn_flags |= TRAILINGSLASH; |
| } |
| } |
| ndp->ni_next = cp; |
| |
| cnp->cn_flags |= MAKEENTRY; |
| if (*cp == '\0' && docache == 0) |
| cnp->cn_flags &= ~MAKEENTRY; |
| if (cnp->cn_namelen == 2 && |
| cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') { |
| cnp->cn_flags |= ISDOTDOT; |
| } else { |
| cnp->cn_flags &= ~ISDOTDOT; |
| } |
| if (*ndp->ni_next == 0) { |
| cnp->cn_flags |= ISLASTCN; |
| } else { |
| cnp->cn_flags &= ~ISLASTCN; |
| } |
| |
| #if 0 |
| if ((cnp->cn_flags & ISLASTCN) != 0 && |
| cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && |
| (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { |
| error = EINVAL; |
| goto bad; |
| } |
| #endif |
| |
| //nameicap_tracker_add(ndp, dp); |
| |
| /* |
| * Check for degenerate name (e.g. / or "") |
| * which is a way of talking about a directory, |
| * e.g. like "/." or ".". |
| */ |
| if (cnp->cn_nameptr[0] == '\0') { |
| if (dp->v_type != VDIR) { |
| error = ENOTDIR; |
| goto bad; |
| } |
| if (cnp->cn_nameiop != LOOKUP) { |
| error = EISDIR; |
| goto bad; |
| } |
| if (wantparent) { |
| ndp->ni_dvp = dp; |
| VREF(dp); |
| } |
| ndp->ni_vp = dp; |
| |
| if (cnp->cn_flags & AUDITVNODE1) |
| AUDIT_ARG_VNODE1(dp); |
| else if (cnp->cn_flags & AUDITVNODE2) |
| AUDIT_ARG_VNODE2(dp); |
| |
| if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) |
| VOP_UNLOCK(dp, 0); |
| /* XXX This should probably move to the top of function. */ |
| if (cnp->cn_flags & SAVESTART) |
| panic("lookup: SAVESTART"); |
| goto success; |
| } |
| |
| /* |
| * Handle "..": five special cases. |
| * 0. If doing a capability lookup and lookup_cap_dotdot is |
| * disabled, return ENOTCAPABLE. |
| * 1. Return an error if this is the last component of |
| * the name and the operation is DELETE or RENAME. |
| * 2. If at root directory (e.g. after chroot) |
| * or at absolute root directory |
| * then ignore it so can't get out. |
| * 3. If this vnode is the root of a mounted |
| * filesystem, then replace it with the |
| * vnode which was mounted on so we take the |
| * .. in the other filesystem. |
| * 4. If the vnode is the top directory of |
| * the jail or chroot, don't let them out. |
| * 5. If doing a capability lookup and lookup_cap_dotdot is |
| * enabled, return ENOTCAPABLE if the lookup would escape |
| * from the initial file descriptor directory. Checks are |
| * done by ensuring that namei() already traversed the |
| * result of dotdot lookup. |
| */ |
| if (cnp->cn_flags & ISDOTDOT) { |
| #ifdef NI_LCF_STRICTRELATIVE |
| if ((ndp->ni_lcf & (NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT)) |
| == NI_LCF_STRICTRELATIVE) { |
| #ifdef KTRACE |
| if (KTRPOINT(curthread, KTR_CAPFAIL)) |
| ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); |
| #endif |
| error = ENOTCAPABLE; |
| printf("vfsroot debug: lookup: ENOTCAPABLE\n"); |
| goto bad; |
| } |
| #endif |
| #if 0 |
| if ((cnp->cn_flags & ISLASTCN) != 0 && |
| (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { |
| error = EINVAL; |
| goto bad; |
| } |
| #endif |
| for (;;) { |
| for (pr = cnp->cn_cred->cr_prison; pr != NULL; |
| pr = pr->pr_parent) { |
| if (dp == pr->pr_root) break; |
| } |
| if (dp == ndp->ni_rootdir || |
| dp == ndp->ni_topdir || |
| dp == rootvnode || |
| pr != NULL || |
| ((dp->v_vflag & VV_ROOT) != 0 && |
| (cnp->cn_flags & NOCROSSMOUNT) != 0)) { |
| ndp->ni_dvp = dp; |
| ndp->ni_vp = dp; |
| VREF(dp); |
| goto nextname; |
| } |
| if ((dp->v_vflag & VV_ROOT) == 0) |
| break; |
| if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ |
| error = ENOENT; |
| goto bad; |
| } |
| tdp = dp; |
| dp = dp->v_mount->mnt_vnodecovered; |
| VREF(dp); |
| vput(tdp); |
| vn_lock(dp, |
| compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | |
| LK_RETRY, ISDOTDOT)); |
| #if 0 |
| error = nameicap_check_dotdot(ndp, dp, lookup_cap_dotdot_nonlocal); |
| if (error != 0) { |
| printf("vfsunlink debug: %s:%u: error = %u\n", __FILE__, __LINE__, error); |
| #ifdef KTRACE |
| if (KTRPOINT(curthread, KTR_CAPFAIL)) |
| ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); |
| #endif |
| goto bad; |
| } |
| #endif |
| } |
| } |
| |
| /* |
| * We now have a segment name to search for, and a directory to search. |
| */ |
| unionlookup: |
| #ifdef MAC |
| if ((cnp->cn_flags & NOMACCHECK) == 0) { |
| error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp, |
| cnp); |
| if (error) |
| goto bad; |
| } |
| #endif |
| ndp->ni_dvp = dp; |
| ndp->ni_vp = NULL; |
| ASSERT_VOP_LOCKED(dp, "lookup"); |
| /* |
| * If we have a shared lock we may need to upgrade the lock for the |
| * last operation. |
| */ |
| if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN) && |
| VOP_ISLOCKED(dp) == LK_SHARED) { |
| vn_lock(dp, LK_UPGRADE|LK_RETRY); |
| } |
| if ((dp->v_iflag & VI_DOOMED) != 0) { |
| error = ENOENT; |
| goto bad; |
| } |
| /* |
| * If we're looking up the last component and we need an exclusive |
| * lock, adjust our lkflags. |
| */ |
| if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) { |
| cnp->cn_lkflags = LK_EXCLUSIVE; |
| } |
| #ifdef NAMEI_DIAGNOSTIC |
| vn_printf(dp, "lookup in "); |
| #endif |
| lkflags_save = cnp->cn_lkflags; |
| cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags, |
| cnp->cn_flags); |
| error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp); |
| cnp->cn_lkflags = lkflags_save; |
| if (error != 0) { |
| KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); |
| #ifdef NAMEI_DIAGNOSTIC |
| printf("not found\n"); |
| #endif |
| if ((error == ENOENT) && |
| (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && |
| (dp->v_mount->mnt_flag & MNT_UNION)) { |
| tdp = dp; |
| dp = dp->v_mount->mnt_vnodecovered; |
| VREF(dp); |
| vput(tdp); |
| vn_lock(dp, |
| compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | |
| LK_RETRY, cnp->cn_flags)); |
| //nameicap_tracker_add(ndp, dp); |
| goto unionlookup; |
| } |
| |
| #ifdef ERELOOKUP |
| if (error == ERELOOKUP) { |
| vref(dp); |
| ndp->ni_vp = dp; |
| error = 0; |
| relookup = 1; |
| goto good; |
| } |
| #endif |
| |
| if (error != EJUSTRETURN) |
| goto bad; |
| /* |
| * At this point, we know we're at the end of the |
| * pathname. If creating / renaming, we can consider |
| * allowing the file or directory to be created / renamed, |
| * provided we're not on a read-only filesystem. |
| */ |
| if (rdonly) { |
| error = EROFS; |
| goto bad; |
| } |
| /* trailing slash only allowed for directories */ |
| if ((cnp->cn_flags & TRAILINGSLASH) && |
| !(cnp->cn_flags & WILLBEDIR)) { |
| error = ENOENT; |
| goto bad; |
| } |
| if ((cnp->cn_flags & LOCKPARENT) == 0) |
| VOP_UNLOCK(dp, 0); |
| /* |
| * We return with ni_vp NULL to indicate that the entry |
| * doesn't currently exist, leaving a pointer to the |
| * (possibly locked) directory vnode in ndp->ni_dvp. |
| */ |
| if (cnp->cn_flags & SAVESTART) { |
| ndp->ni_startdir = ndp->ni_dvp; |
| VREF(ndp->ni_startdir); |
| } |
| goto success; |
| } |
| |
| good: |
| #ifdef NAMEI_DIAGNOSTIC |
| printf("found\n"); |
| #endif |
| dp = ndp->ni_vp; |
| |
| #if 0 |
| /* |
| * Check to see if the vnode has been mounted on; |
| * if so find the root of the mounted filesystem. |
| */ |
| while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && |
| (cnp->cn_flags & NOCROSSMOUNT) == 0) { |
| if (vfs_busy(mp, 0)) |
| continue; |
| vput(dp); |
| if (dp != ndp->ni_dvp) |
| vput(ndp->ni_dvp); |
| else |
| vrele(ndp->ni_dvp); |
| vrefact(vp_crossmp); |
| ndp->ni_dvp = vp_crossmp; |
| error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags, |
| cnp->cn_flags), &tdp); |
| vfs_unbusy(mp); |
| if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) |
| panic("vp_crossmp exclusively locked or reclaimed"); |
| if (error) { |
| dpunlocked = 1; |
| goto bad2; |
| } |
| ndp->ni_vp = dp = tdp; |
| } |
| #endif |
| |
| /* |
| * Check for symbolic link |
| */ |
| if ((dp->v_type == VLNK) && |
| ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) || |
| *ndp->ni_next == '/')) { |
| cnp->cn_flags |= ISSYMLINK; |
| if (dp->v_iflag & VI_DOOMED) { |
| /* |
| * We can't know whether the directory was mounted with |
| * NOSYMFOLLOW, so we can't follow safely. |
| */ |
| error = ENOENT; |
| goto bad2; |
| } |
| if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { |
| error = EACCES; |
| goto bad2; |
| } |
| /* |
| * Symlink code always expects an unlocked dvp. |
| */ |
| if (ndp->ni_dvp != ndp->ni_vp) { |
| VOP_UNLOCK(ndp->ni_dvp, 0); |
| ni_dvp_unlocked = 1; |
| } |
| goto success; |
| } |
| |
| nextname: |
| /* |
| * Not a symbolic link that we will follow. Continue with the |
| * next component if there is any; otherwise, we're done. |
| */ |
| KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', |
| ("lookup: invalid path state.")); |
| if (relookup) { |
| relookup = 0; |
| ndp->ni_pathlen = prev_ni_pathlen; |
| ndp->ni_next = prev_ni_next; |
| if (ndp->ni_dvp != dp) |
| vput(ndp->ni_dvp); |
| else |
| vrele(ndp->ni_dvp); |
| goto dirloop; |
| } |
| #if 0 |
| if (cnp->cn_flags & ISDOTDOT) { |
| error = nameicap_check_dotdot(ndp, ndp->ni_vp, lookup_cap_dotdot_nonlocal); |
| if (error != 0) { |
| printf("vfsunlink debug: %s:%u: error = %u\n", __FILE__, __LINE__, error); |
| #ifdef KTRACE |
| if (KTRPOINT(curthread, KTR_CAPFAIL)) |
| ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); |
| #endif |
| goto bad2; |
| } |
| } |
| #endif |
| if (*ndp->ni_next == '/') { |
| cnp->cn_nameptr = ndp->ni_next; |
| while (*cnp->cn_nameptr == '/') { |
| cnp->cn_nameptr++; |
| ndp->ni_pathlen--; |
| } |
| if (ndp->ni_dvp != dp) |
| vput(ndp->ni_dvp); |
| else |
| vrele(ndp->ni_dvp); |
| goto dirloop; |
| } |
| /* |
| * If we're processing a path with a trailing slash, |
| * check that the end result is a directory. |
| */ |
| if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) { |
| error = ENOTDIR; |
| goto bad2; |
| } |
| /* |
| * Disallow directory write attempts on read-only filesystems. |
| */ |
| if (rdonly && |
| (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { |
| error = EROFS; |
| goto bad2; |
| } |
| if (cnp->cn_flags & SAVESTART) { |
| ndp->ni_startdir = ndp->ni_dvp; |
| VREF(ndp->ni_startdir); |
| } |
| if (!wantparent) { |
| ni_dvp_unlocked = 2; |
| if (ndp->ni_dvp != dp) |
| vput(ndp->ni_dvp); |
| else |
| vrele(ndp->ni_dvp); |
| } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) { |
| VOP_UNLOCK(ndp->ni_dvp, 0); |
| ni_dvp_unlocked = 1; |
| } |
| |
| if (cnp->cn_flags & AUDITVNODE1) |
| AUDIT_ARG_VNODE1(dp); |
| else if (cnp->cn_flags & AUDITVNODE2) |
| AUDIT_ARG_VNODE2(dp); |
| |
| if ((cnp->cn_flags & LOCKLEAF) == 0) |
| VOP_UNLOCK(dp, 0); |
| success: |
| /* |
| * Because of lookup_shared we may have the vnode shared locked, but |
| * the caller may want it to be exclusively locked. |
| */ |
| if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) && |
| VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { |
| vn_lock(dp, LK_UPGRADE | LK_RETRY); |
| if (dp->v_iflag & VI_DOOMED) { |
| error = ENOENT; |
| goto bad2; |
| } |
| } |
| return (0); |
| |
| bad2: |
| if (ni_dvp_unlocked != 2) { |
| if (dp != ndp->ni_dvp && !ni_dvp_unlocked) { |
| vput(ndp->ni_dvp); |
| } else { |
| vrele(ndp->ni_dvp); |
| } |
| } |
| bad: |
| if (!dpunlocked) |
| vput(dp); |
| ndp->ni_vp = NULL; |
| return (error); |
| } |
| |
| static int relaxed_namei(struct nameidata *ndp) { |
| struct filedesc *fdp; /* pointer to file descriptor state */ |
| char *cp; /* pointer into pathname argument */ |
| struct vnode *dp; /* the directory we are searching */ |
| struct iovec aiov; /* uio for reading symbolic links */ |
| struct componentname *cnp; |
| struct thread *td; |
| struct proc *p; |
| cap_rights_t rights; |
| struct uio auio; |
| int error, linklen, startdir_used; |
| |
| int lookup_shared = 1; // TODO: get from sysctl |
| int lookup_cap_dotdot = 1; // TODO: get from sysctl |
| |
| cnp = &ndp->ni_cnd; |
| td = cnp->cn_thread; |
| p = td->td_proc; |
| ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; |
| KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); |
| KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, |
| ("namei: nameiop contaminated with flags")); |
| KASSERT((cnp->cn_flags & OPMASK) == 0, |
| ("namei: flags contaminated with nameiops")); |
| MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR || |
| ndp->ni_startdir->v_type == VBAD); |
| if (!lookup_shared) |
| cnp->cn_flags &= ~LOCKSHARED; |
| fdp = p->p_fd; |
| #ifdef NI_LCF_STRICTRELATIVE |
| TAILQ_INIT(&ndp->ni_cap_tracker); |
| ndp->ni_lcf = 0; |
| #endif |
| |
| /* We will set this ourselves if we need it. */ |
| cnp->cn_flags &= ~TRAILINGSLASH; |
| |
| /* |
| * Get a buffer for the name to be translated, and copy the |
| * name into the buffer. |
| */ |
| if ((cnp->cn_flags & HASBUF) == 0) { |
| cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); |
| } |
| if (ndp->ni_segflg == UIO_SYSSPACE) { |
| error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, |
| &ndp->ni_pathlen); |
| } else { |
| error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, |
| &ndp->ni_pathlen); |
| } |
| |
| /* |
| * Don't allow empty pathnames. |
| */ |
| if (error == 0 && *cnp->cn_pnbuf == '\0') |
| error = ENOENT; |
| |
| #ifdef CAPABILITY_MODE |
| /* |
| * In capability mode, lookups must be restricted to happen in |
| * the subtree with the root specified by the file descriptor: |
| * - The root must be real file descriptor, not the pseudo-descriptor |
| * AT_FDCWD. |
| * - The passed path must be relative and not absolute. |
| * - If lookup_cap_dotdot is disabled, path must not contain the |
| * '..' components. |
| * - If lookup_cap_dotdot is enabled, we verify that all '..' |
| * components lookups result in the directories which were |
| * previously walked by us, which prevents an escape from |
| * the relative root. |
| */ |
| if (error == 0 && IN_CAPABILITY_MODE(td) && |
| (cnp->cn_flags & NOCAPCHECK) == 0) { |
| #ifdef NI_LCF_STRICTRELATIVE |
| ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; |
| #endif |
| if (ndp->ni_dirfd == AT_FDCWD) { |
| #ifdef KTRACE |
| if (KTRPOINT(td, KTR_CAPFAIL)) { |
| ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); |
| } |
| #endif |
| error = ECAPMODE; |
| } |
| } |
| #endif |
| if (error != 0) { |
| namei_cleanup_cnp(cnp); |
| ndp->ni_vp = NULL; |
| return (error); |
| } |
| ndp->ni_loopcnt = 0; |
| #ifdef KTRACE |
| if (KTRPOINT(td, KTR_NAMEI)) { |
| KASSERT(cnp->cn_thread == curthread, |
| ("namei not using curthread")); |
| ktrnamei(cnp->cn_pnbuf); |
| } |
| #endif |
| /* |
| * Get starting point for the translation. |
| */ |
| FILEDESC_SLOCK(fdp); |
| ndp->ni_rootdir = fdp->fd_rdir; |
| vrefact(ndp->ni_rootdir); |
| ndp->ni_topdir = fdp->fd_jdir; |
| |
| /* |
| * If we are auditing the kernel pathname, save the user pathname. |
| */ |
| if (cnp->cn_flags & AUDITVNODE1) { |
| AUDIT_ARG_UPATH1(td, ndp->ni_dirfd, cnp->cn_pnbuf); |
| } |
| if (cnp->cn_flags & AUDITVNODE2) { |
| AUDIT_ARG_UPATH2(td, ndp->ni_dirfd, cnp->cn_pnbuf); |
| } |
| |
| startdir_used = 0; |
| dp = NULL; |
| cnp->cn_nameptr = cnp->cn_pnbuf; |
| if (cnp->cn_pnbuf[0] == '/') { |
| error = namei_handle_root(ndp, &dp); |
| } else { |
| if (ndp->ni_startdir != NULL) { |
| dp = ndp->ni_startdir; |
| startdir_used = 1; |
| } else if (ndp->ni_dirfd == AT_FDCWD) { |
| dp = fdp->fd_cdir; |
| vrefact(dp); |
| } else { |
| rights = ndp->ni_rightsneeded; |
| cap_rights_set(&rights, CAP_LOOKUP); |
| |
| if (cnp->cn_flags & AUDITVNODE1) |
| AUDIT_ARG_ATFD1(ndp->ni_dirfd); |
| if (cnp->cn_flags & AUDITVNODE2) |
| AUDIT_ARG_ATFD2(ndp->ni_dirfd); |
| error = fgetvp_rights(td, ndp->ni_dirfd, |
| &rights, &ndp->ni_filecaps, &dp); |
| if (error == EINVAL) |
| error = ENOTDIR; |
| #if defined NI_LCF_STRICTRELATIVE && defined CAPABILITIES |
| /* |
| * If file descriptor doesn't have all rights, |
| * all lookups relative to it must also be |
| * strictly relative. |
| */ |
| CAP_ALL(&rights); |
| if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, |
| &rights) || |
| ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || |
| ndp->ni_filecaps.fc_nioctls != -1) { |
| ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; |
| } |
| #endif |
| } |
| if (error == 0 && dp->v_type != VDIR) |
| error = ENOTDIR; |
| } |
| FILEDESC_SUNLOCK(fdp); |
| if (ndp->ni_startdir != NULL && !startdir_used) { |
| vrele(ndp->ni_startdir); |
| } |
| if (error != 0) { |
| if (dp != NULL) |
| vrele(dp); |
| goto out; |
| } |
| #ifdef NI_LCF_STRICTRELATIVE |
| if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 && |
| lookup_cap_dotdot != 0) { |
| ndp->ni_lcf |= NI_LCF_CAP_DOTDOT; |
| } |
| #endif |
| SDT_PROBE3(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, |
| cnp->cn_flags); |
| for (;;) { |
| ndp->ni_startdir = dp; |
| error = relaxed_lookup(ndp, lookup_shared); |
| if (error != 0) |
| goto out; |
| /* |
| * If not a symbolic link, we're done. |
| */ |
| if ((cnp->cn_flags & ISSYMLINK) == 0) { |
| vrele(ndp->ni_rootdir); |
| if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { |
| namei_cleanup_cnp(cnp); |
| } else { |
| cnp->cn_flags |= HASBUF; |
| } |
| //nameicap_cleanup(ndp); |
| SDT_PROBE2(vfs, namei, lookup, return, 0, ndp->ni_vp); |
| return (0); |
| } |
| if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { |
| error = ELOOP; |
| break; |
| } |
| #ifdef MAC |
| if ((cnp->cn_flags & NOMACCHECK) == 0) { |
| error = mac_vnode_check_readlink(td->td_ucred, |
| ndp->ni_vp); |
| if (error != 0) |
| break; |
| } |
| #endif |
| if (ndp->ni_pathlen > 1) { |
| cp = uma_zalloc(namei_zone, M_WAITOK); |
| } else { |
| cp = cnp->cn_pnbuf; |
| } |
| aiov.iov_base = cp; |
| aiov.iov_len = MAXPATHLEN; |
| auio.uio_iov = &aiov; |
| auio.uio_iovcnt = 1; |
| auio.uio_offset = 0; |
| auio.uio_rw = UIO_READ; |
| auio.uio_segflg = UIO_SYSSPACE; |
| auio.uio_td = td; |
| auio.uio_resid = MAXPATHLEN; |
| error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); |
| if (error != 0) { |
| if (ndp->ni_pathlen > 1) { |
| uma_zfree(namei_zone, cp); |
| } |
| break; |
| } |
| linklen = MAXPATHLEN - auio.uio_resid; |
| if (linklen == 0) { |
| if (ndp->ni_pathlen > 1) { |
| uma_zfree(namei_zone, cp); |
| } |
| error = ENOENT; |
| break; |
| } |
| if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { |
| if (ndp->ni_pathlen > 1) |
| uma_zfree(namei_zone, cp); |
| error = ENAMETOOLONG; |
| break; |
| } |
| if (ndp->ni_pathlen > 1) { |
| bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); |
| uma_zfree(namei_zone, cnp->cn_pnbuf); |
| cnp->cn_pnbuf = cp; |
| } else { |
| cnp->cn_pnbuf[linklen] = '\0'; |
| } |
| ndp->ni_pathlen += linklen; |
| vput(ndp->ni_vp); |
| dp = ndp->ni_dvp; |
| /* |
| * Check if root directory should replace current directory. |
| */ |
| cnp->cn_nameptr = cnp->cn_pnbuf; |
| if (*(cnp->cn_nameptr) == '/') { |
| vrele(dp); |
| error = namei_handle_root(ndp, &dp); |
| if (error != 0) |
| goto out; |
| } |
| } |
| vput(ndp->ni_vp); |
| ndp->ni_vp = NULL; |
| vrele(ndp->ni_dvp); |
| out: |
| vrele(ndp->ni_rootdir); |
| namei_cleanup_cnp(cnp); |
| //nameicap_cleanup(ndp); |
| SDT_PROBE2(vfs, namei, lookup, return, error, NULL); |
| return (error); |
| } |
| |
| static void print_ufs_dir_info(struct vnode *dv) { |
| const struct ufs_inode *di = UFS_VTOI(dv); |
| printf("i_number=%u, i_flag=0x%x, i_effnlink=%d, i_count=%d, i_endoff=%d, i_diroff=%d, i_offset=%d, i_nextclustercg=%d, i_size=%llu, i_gen=%llu\n", |
| (unsigned int)di->i_number, (unsigned int)di->i_flag, di->i_effnlink, |
| (int)di->i_count, (int)di->i_endoff, (int)di->i_diroff, (int)di->i_offset, |
| di->i_nextclustercg, (unsigned long long int)di->i_size, (unsigned long long int)di->i_gen); |
| } |
| |
| static int handle_unlink(SYSCTL_HANDLER_ARGS) { |
| if(req->oldptr) return ENOTSUP; |
| if(!req->newptr) return 0; |
| arg2 = req->newlen - req->newidx; |
| //uprintf("arg2 = %ld\n", (long int)arg2); |
| char path[arg2+1]; |
| int e = SYSCTL_IN(req, path, arg2); |
| path[arg2] = 0; |
| if(e) return e; |
| //uprintf("path = \"%s\"\n", path); |
| |
| struct thread *td = curthread; |
| struct mount *mp; |
| struct nameidata nd; |
| cap_rights_t rights; |
| restart: |
| bwillwrite(); |
| #if 1 |
| NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, |
| path, AT_FDCWD, cap_rights_init(&rights, CAP_UNLINKAT), td); |
| #else |
| NDINIT_ATRIGHTS(&nd, 0, LOCKPARENT | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, |
| path, AT_FDCWD, cap_rights_init(&rights, CAP_UNLINKAT), td); |
| #endif |
| e = relaxed_namei(&nd); |
| //uprintf("e = %d\n", e); |
| if(e) return e == EINVAL ? EPERM : e; |
| struct vnode *vp = nd.ni_vp; |
| if(vp->v_type == VDIR) e = priv_check(td, PRIV_VFS_ADMIN); |
| if(!e) { |
| e = vn_start_write(nd.ni_dvp, &mp, V_NOWAIT); |
| if(e) { |
| NDFREE(&nd, NDF_ONLY_PNBUF); |
| vput(nd.ni_dvp); |
| if(vp == nd.ni_dvp) vrele(vp); |
| else vput(vp); |
| e = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); |
| if(e) return e; |
| goto restart; |
| } |
| #if 1 |
| if(strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN) == 0) { |
| print_ufs_dir_info(nd.ni_dvp); |
| struct vnode *v; |
| ino_t ino; |
| e = ufs_lookup_ino(nd.ni_dvp, &v, &nd.ni_cnd, &ino); |
| #if 1 |
| if(e) printf("ufs_lookup_ino failed: error %d\n", e); |
| else printf("ino = %u\n", (unsigned int)ino); |
| #else |
| if(e) { |
| printf("ufs_lookup_ino failed: error %d\n", e); |
| goto cleanup; |
| } |
| #endif |
| print_ufs_dir_info(nd.ni_dvp); |
| } |
| #endif |
| #ifdef MAC |
| e = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); |
| if(e) goto skip; |
| #endif |
| vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); |
| e = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); |
| #ifdef MAC |
| skip: |
| #endif |
| vn_finished_write(mp); |
| } |
| cleanup: |
| NDFREE(&nd, NDF_ONLY_PNBUF); |
| vput(nd.ni_dvp); |
| if(vp == nd.ni_dvp) vrele(vp); |
| else vput(vp); |
| return e; |
| } |
| |
| SYSCTL_OID(_vfs, OID_AUTO, unlink, CTLTYPE_STRING | CTLFLAG_WR, NULL, 0, handle_unlink, "A", "Unlink by path"); |
| |
| static int vfsunlink_module_event(module_t mod, int type, void *data) { |
| switch(type) { |
| case MOD_LOAD: |
| break; |
| case MOD_UNLOAD: |
| break; |
| case MOD_SHUTDOWN: |
| break; |
| default: |
| return EOPNOTSUPP; |
| } |
| return 0; |
| } |
| |
| static moduledata_t module_data = { "vfsunlink", vfsunlink_module_event, NULL }; |
| DECLARE_MODULE(vfsunlink, module_data, SI_SUB_VFS, SI_ORDER_ANY); |
| MODULE_VERSION(vfsunlink, 1); |