diff --git a/os/shd/mount.h b/os/shd/mount.h new file mode 100644 index 0000000000000000000000000000000000000000..4fa3dc8f508d0ba2699f8593c5dfa7dca6e40ad3 --- /dev/null +++ b/os/shd/mount.h @@ -0,0 +1,498 @@ +/* + * Copyright (c) 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)mount.h 8.21 (Berkeley) 5/20/95 + * $FreeBSD: src/sys/sys/mount.h,v 1.89.2.6 2002/04/26 00:46:00 iedowse Exp $ + */ + +#ifndef _SYS_MOUNT_H_ +#define _SYS_MOUNT_H_ + +#include + +#ifndef _KERNEL +#if !defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE) +#include +#endif /* !_POSIX_C_SOURCE */ +#endif /* !_KERNEL */ + +#include +#ifdef _KERNEL +#include +#endif + +typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */ + +/* + * File identifier. + * These are unique per filesystem on a single machine. + */ +#define MAXFIDSZ 16 + +struct fid { + u_short fid_len; /* length of data in bytes */ + u_short fid_reserved; /* force longword alignment */ + char fid_data[MAXFIDSZ]; /* data (variable length) */ +}; + +/* + * file system statistics + */ + +#define MFSNAMELEN 16 /* length of fs type name, including null */ +#ifdef __i386__ +#define MNAMELEN 80 /* length of buffer for returned name */ +#endif +#ifdef __alpha__ +#define MNAMELEN 72 /* length of buffer for returned name */ +#endif + +struct statfs { + long f_spare2; /* placeholder */ + long f_bsize; /* fundamental file system block size */ + long f_iosize; /* optimal transfer block size */ + long f_blocks; /* total data blocks in file system */ + long f_bfree; /* free blocks in fs */ + long f_bavail; /* free blocks avail to non-superuser */ + long f_files; /* total file nodes in file system */ + long f_ffree; /* free file nodes in fs */ + fsid_t f_fsid; /* file system id */ + uid_t f_owner; /* user that mounted the filesystem */ + int f_type; /* type of filesystem */ + int f_flags; /* copy of mount exported flags */ + long f_syncwrites; /* count of sync writes since mount */ + long f_asyncwrites; /* count of async writes since mount */ + char f_fstypename[MFSNAMELEN]; /* fs type name */ + char f_mntonname[MNAMELEN]; /* directory on which mounted */ + long f_syncreads; /* count of sync reads since mount */ + long f_asyncreads; /* count of async reads since mount */ + short f_spares1; /* unused spare */ + char f_mntfromname[MNAMELEN];/* mounted filesystem */ + short f_spares2; /* unused spare */ + long f_spare[2]; /* unused spare */ +}; + +#ifdef _KERNEL +/* + * Structure per mounted file system. Each mounted file system has an + * array of operations and an instance record. The file systems are + * put on a doubly linked list. + * + * NOTE: mnt_nvnodelist and mnt_reservedvnlist. At the moment vnodes + * are linked into mnt_nvnodelist. At some point in the near future the + * vnode list will be split into a 'dirty' and 'clean' list. mnt_nvnodelist + * will become the dirty list and mnt_reservedvnlist will become the 'clean' + * list. Filesystem kld's syncing code should remain compatible since + * they only need to scan the dirty vnode list (nvnodelist -> dirtyvnodelist). + */ +TAILQ_HEAD(vnodelst, vnode); + +struct mount { + TAILQ_ENTRY(mount) mnt_list; /* mount list */ + struct vfsops *mnt_op; /* operations on fs */ + struct vfsconf *mnt_vfc; /* configuration info */ + struct vnode *mnt_vnodecovered; /* vnode we mounted on */ + struct vnode *mnt_syncer; /* syncer vnode */ + struct vnodelst mnt_nvnodelist; /* list of vnodes this mount */ + struct lock mnt_lock; /* mount structure lock */ + int mnt_flag; /* flags shared with user */ + int mnt_kern_flag; /* kernel only flags */ + int mnt_maxsymlinklen; /* max size of short symlink */ + struct statfs mnt_stat; /* cache of filesystem stats */ + qaddr_t mnt_data; /* private data */ + time_t mnt_time; /* last time written*/ + u_int mnt_iosize_max; /* max IO request size */ + struct vnodelst mnt_reservedvnlist; /* (future) dirty vnode list */ + void *mnt_prison; /* associated prison */ + int mnt_ops_in_progress; +}; +#endif /* _KERNEL */ + +/* + * User specifiable flags. + */ +#define MNT_RDONLY 0x00000001 /* read only filesystem */ +#define MNT_SYNCHRONOUS 0x00000002 /* file system written synchronously */ +#define MNT_NOEXEC 0x00000004 /* can't exec from filesystem */ +#define MNT_NOSUID 0x00000008 /* don't honor setuid bits on fs */ +#define MNT_NODEV 0x00000010 /* don't interpret special files */ +#define MNT_UNION 0x00000020 /* union with underlying filesystem */ +#define MNT_ASYNC 0x00000040 /* file system written asynchronously */ +#define MNT_SUIDDIR 0x00100000 /* special handling of SUID on dirs */ +#define MNT_SOFTDEP 0x00200000 /* soft updates being done */ +#define MNT_NOSYMFOLLOW 0x00400000 /* do not follow symlinks */ +#define MNT_NOATIME 0x10000000 /* disable update of file access time */ +#define MNT_NOCLUSTERR 0x40000000 /* disable cluster read */ +#define MNT_NOCLUSTERW 0x80000000 /* disable cluster write */ + +/* + * NFS export related mount flags. + */ +#define MNT_EXRDONLY 0x00000080 /* exported read only */ +#define MNT_EXPORTED 0x00000100 /* file system is exported */ +#define MNT_DEFEXPORTED 0x00000200 /* exported to the world */ +#define MNT_EXPORTANON 0x00000400 /* use anon uid mapping for everyone */ +#define MNT_EXKERB 0x00000800 /* exported with Kerberos uid mapping */ +#define MNT_EXPUBLIC 0x20000000 /* public export (WebNFS) */ + +/* + * Flags set by internal operations, + * but visible to the user. + * XXX some of these are not quite right.. (I've never seen the root flag set) + */ +#define MNT_LOCAL 0x00001000 /* filesystem is stored locally */ +#define MNT_QUOTA 0x00002000 /* quotas are enabled on filesystem */ +#define MNT_ROOTFS 0x00004000 /* identifies the root filesystem */ +#define MNT_USER 0x00008000 /* mounted by a user */ +#define MNT_IGNORE 0x00800000 /* do not show entry in df */ + +/* + * Mask of flags that are visible to statfs() + * XXX I think that this could now become (~(MNT_CMDFLAGS)) + * but the 'mount' program may need changing to handle this. + */ +#define MNT_VISFLAGMASK (MNT_RDONLY | MNT_SYNCHRONOUS | MNT_NOEXEC | \ + MNT_NOSUID | MNT_NODEV | MNT_UNION | \ + MNT_ASYNC | MNT_EXRDONLY | MNT_EXPORTED | \ + MNT_DEFEXPORTED | MNT_EXPORTANON| MNT_EXKERB | \ + MNT_LOCAL | MNT_USER | MNT_QUOTA | \ + MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \ + MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \ + MNT_IGNORE | MNT_NOSYMFOLLOW | MNT_EXPUBLIC ) +/* + * External filesystem command modifier flags. + * Unmount can use the MNT_FORCE flag. + * XXX These are not STATES and really should be somewhere else. + */ +#define MNT_UPDATE 0x00010000 /* not a real mount, just an update */ +#define MNT_DELEXPORT 0x00020000 /* delete export host lists */ +#define MNT_RELOAD 0x00040000 /* reload filesystem data */ +#define MNT_FORCE 0x00080000 /* force unmount or readonly change */ +#define MNT_CMDFLAGS (MNT_UPDATE|MNT_DELEXPORT|MNT_RELOAD|MNT_FORCE) +/* + * Internal filesystem control flags stored in mnt_kern_flag. + * + * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed + * past the mount point. This keeps the subtree stable during mounts + * and unmounts. + * + * MNTK_UNMOUNTF permits filesystems to detect a forced unmount while + * dounmount() is still waiting to lock the mountpoint. This allows + * the filesystem to cancel operations that might otherwise deadlock + * with the unmount attempt (used by NFS). + */ +#define MNTK_UNMOUNTF 0x00000001 /* forced unmount in progress */ +#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ +#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ +#define MNTK_WANTRDWR 0x04000000 /* upgrade to read/write requested */ + +/* + * Sysctl CTL_VFS definitions. + * + * Second level identifier specifies which filesystem. Second level + * identifier VFS_VFSCONF returns information about all filesystems. + * Second level identifier VFS_GENERIC is non-terminal. + */ +#define VFS_VFSCONF 0 /* get configured filesystems */ +#define VFS_GENERIC 0 /* generic filesystem information */ +/* + * Third level identifiers for VFS_GENERIC are given below; third + * level identifiers for specific filesystems are given in their + * mount specific header files. + */ +#define VFS_MAXTYPENUM 1 /* int: highest defined filesystem type */ +#define VFS_CONF 2 /* struct: vfsconf for filesystem given + as next argument */ + +/* + * Flags for various system call interfaces. + * + * waitfor flags to vfs_sync() and getfsstat() + */ +#define MNT_WAIT 1 /* synchronously wait for I/O to complete */ +#define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ +#define MNT_LAZY 3 /* push data not written by filesystem syncer */ + +/* + * Generic file handle + */ +struct fhandle { + fsid_t fh_fsid; /* File system id of mount point */ + struct fid fh_fid; /* File sys specific id */ +}; +typedef struct fhandle fhandle_t; + +/* + * Export arguments for local filesystem mount calls. + */ +struct export_args { + int ex_flags; /* export related flags */ + uid_t ex_root; /* mapping for root uid */ + struct ucred ex_anon; /* mapping for anonymous user */ + struct sockaddr *ex_addr; /* net address to which exported */ + int ex_addrlen; /* and the net address length */ + struct sockaddr *ex_mask; /* mask of valid bits in saddr */ + int ex_masklen; /* and the smask length */ + char *ex_indexfile; /* index file for WebNFS URLs */ +}; + +/* + * Structure holding information for a publicly exported filesystem + * (WebNFS). Currently the specs allow just for one such filesystem. + */ +struct nfs_public { + int np_valid; /* Do we hold valid information */ + fhandle_t np_handle; /* Filehandle for pub fs (internal) */ + struct mount *np_mount; /* Mountpoint of exported fs */ + char *np_index; /* Index file */ +}; + +/* + * Filesystem configuration information. One of these exists for each + * type of filesystem supported by the kernel. These are searched at + * mount time to identify the requested filesystem. + */ +struct vfsconf { + struct vfsops *vfc_vfsops; /* filesystem operations vector */ + char vfc_name[MFSNAMELEN]; /* filesystem type name */ + int vfc_typenum; /* historic filesystem type number */ + int vfc_refcount; /* number mounted of this type */ + int vfc_flags; /* permanent flags */ + struct vfsconf *vfc_next; /* next in list */ +}; + +struct ovfsconf { + void *vfc_vfsops; + char vfc_name[32]; + int vfc_index; + int vfc_refcount; + int vfc_flags; +}; + +/* + * NB: these flags refer to IMPLEMENTATION properties, not properties of + * any actual mounts; i.e., it does not make sense to change the flags. + */ +#define VFCF_STATIC 0x00010000 /* statically compiled into kernel */ +#define VFCF_NETWORK 0x00020000 /* may get data over the network */ +#define VFCF_READONLY 0x00040000 /* writes are not implemented */ +#define VFCF_SYNTHETIC 0x00080000 /* data does not represent real files */ +#define VFCF_LOOPBACK 0x00100000 /* aliases some other mounted FS */ +#define VFCF_UNICODE 0x00200000 /* stores file names as Unicode*/ + +#ifdef _KERNEL + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_MOUNT); +#endif +extern int maxvfsconf; /* highest defined filesystem type */ +extern int nfs_mount_type; /* vfc_typenum for nfs, or -1 */ +extern struct vfsconf *vfsconf; /* head of list of filesystem types */ + +/* + * Operations supported on mounted file system. + */ +#ifdef __STDC__ +struct nameidata; +struct mbuf; +#endif + +struct vfsops { + int (*vfs_mount) __P((struct mount *mp, char *path, caddr_t data, + struct nameidata *ndp, struct proc *p)); + int (*vfs_start) __P((struct mount *mp, int flags, + struct proc *p)); + int (*vfs_unmount) __P((struct mount *mp, int mntflags, + struct proc *p)); + int (*vfs_root) __P((struct mount *mp, struct vnode **vpp)); + int (*vfs_quotactl) __P((struct mount *mp, int cmds, uid_t uid, + caddr_t arg, struct proc *p)); + int (*vfs_statfs) __P((struct mount *mp, struct statfs *sbp, + struct proc *p)); + int (*vfs_sync) __P((struct mount *mp, int waitfor, + struct ucred *cred, struct proc *p)); + int (*vfs_vget) __P((struct mount *mp, ino_t ino, + struct vnode **vpp)); + int (*vfs_fhtovp) __P((struct mount *mp, struct fid *fhp, + struct vnode **vpp)); + int (*vfs_checkexp) __P((struct mount *mp, struct sockaddr *nam, + int *extflagsp, struct ucred **credanonp)); + int (*vfs_vptofh) __P((struct vnode *vp, struct fid *fhp)); + int (*vfs_init) __P((struct vfsconf *)); + int (*vfs_uninit) __P((struct vfsconf *)); + int (*vfs_extattrctl) __P((struct mount *mp, int cmd, + const char *attrname, caddr_t arg, + struct proc *p)); +}; + +#define VFS_MOUNT(MP, PATH, DATA, NDP, P) \ + (*(MP)->mnt_op->vfs_mount)(MP, PATH, DATA, NDP, P) +#define VFS_START(MP, FLAGS, P) (*(MP)->mnt_op->vfs_start)(MP, FLAGS, P) +#define VFS_UNMOUNT(MP, FORCE, P) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE, P) +#define VFS_ROOT(MP, VPP) (*(MP)->mnt_op->vfs_root)(MP, VPP) +#define VFS_QUOTACTL(MP,C,U,A,P) (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A, P) +#define VFS_STATFS(MP, SBP, P) (*(MP)->mnt_op->vfs_statfs)(MP, SBP, P) +#define VFS_SYNC(MP, WAIT, C, P) (*(MP)->mnt_op->vfs_sync)(MP, WAIT, C, P) +#define VFS_VGET(MP, INO, VPP) (*(MP)->mnt_op->vfs_vget)(MP, INO, VPP) +#define VFS_FHTOVP(MP, FIDP, VPP) \ + (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, VPP) +#define VFS_VPTOFH(VP, FIDP) (*(VP)->v_mount->mnt_op->vfs_vptofh)(VP, FIDP) +#define VFS_CHECKEXP(MP, NAM, EXFLG, CRED) \ + (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED) +#define VFS_EXTATTRCTL(MP, C, N, A, P) \ + (*(MP)->mnt_op->vfs_extattrctl)(MP, C, N, A, P) + +#include + +#define VFS_SET(vfsops, fsname, flags) \ + static struct vfsconf fsname ## _vfsconf = { \ + &vfsops, \ + #fsname, \ + -1, \ + 0, \ + flags \ + }; \ + static moduledata_t fsname ## _mod = { \ + #fsname, \ + vfs_modevent, \ + & fsname ## _vfsconf \ + }; \ + DECLARE_MODULE(fsname, fsname ## _mod, SI_SUB_VFS, SI_ORDER_MIDDLE) + +#include + +#define AF_MAX 33 /* XXX */ + +/* + * Network address lookup element + */ +struct netcred { + struct radix_node netc_rnodes[2]; + int netc_exflags; + struct ucred netc_anon; +}; + +/* + * Network export information + */ +struct netexport { + struct netcred ne_defexported; /* Default export */ + struct radix_node_head *ne_rtable[AF_MAX+1]; /* Individual exports */ +}; + +extern char *mountrootfsname; + +/* + * exported vnode operations + */ +int dounmount __P((struct mount *, int, struct proc *)); +int vfs_setpublicfs /* set publicly exported fs */ + __P((struct mount *, struct netexport *, struct export_args *)); +int vfs_lock __P((struct mount *)); /* lock a vfs */ +void vfs_msync __P((struct mount *, int)); +void vfs_unlock __P((struct mount *)); /* unlock a vfs */ +int vfs_busy __P((struct mount *, int, struct simplelock *, struct proc *)); +int vfs_export /* process mount export info */ + __P((struct mount *, struct netexport *, struct export_args *)); +struct netcred *vfs_export_lookup /* lookup host in fs export list */ + __P((struct mount *, struct netexport *, struct sockaddr *)); +int vfs_allocate_syncvnode __P((struct mount *)); +void vfs_getnewfsid __P((struct mount *)); +dev_t vfs_getrootfsid __P((struct mount *)); +struct mount *vfs_getvfs __P((fsid_t *)); /* return vfs given fsid */ +int vfs_modevent __P((module_t, int, void *)); +int vfs_mountedon __P((struct vnode *)); /* is a vfs mounted on vp */ +int vfs_rootmountalloc __P((char *, char *, struct mount **)); +void vfs_unbusy __P((struct mount *, struct proc *)); +void vfs_unmountall __P((void)); +int vfs_register __P((struct vfsconf *)); +int vfs_unregister __P((struct vfsconf *)); +extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ +extern struct simplelock mountlist_slock; +extern struct nfs_public nfs_pub; + +/* + * Declarations for these vfs default operations are located in + * kern/vfs_default.c, they should be used instead of making "dummy" + * functions or casting entries in the VFS op table to "enopnotsupp()". + */ +int vfs_stdmount __P((struct mount *mp, char *path, caddr_t data, + struct nameidata *ndp, struct proc *p)); +int vfs_stdstart __P((struct mount *mp, int flags, struct proc *p)); +int vfs_stdunmount __P((struct mount *mp, int mntflags, struct proc *p)); +int vfs_stdroot __P((struct mount *mp, struct vnode **vpp)); +int vfs_stdquotactl __P((struct mount *mp, int cmds, uid_t uid, + caddr_t arg, struct proc *p)); +int vfs_stdstatfs __P((struct mount *mp, struct statfs *sbp, struct proc *p)); +int vfs_stdsync __P((struct mount *mp, int waitfor, struct ucred *cred, + struct proc *p)); +int vfs_stdvget __P((struct mount *mp, ino_t ino, struct vnode **vpp)); +int vfs_stdfhtovp __P((struct mount *mp, struct fid *fhp, struct vnode **vpp)); +int vfs_stdcheckexp __P((struct mount *mp, struct sockaddr *nam, + int *extflagsp, struct ucred **credanonp)); +int vfs_stdvptofh __P((struct vnode *vp, struct fid *fhp)); +int vfs_stdinit __P((struct vfsconf *)); +int vfs_stduninit __P((struct vfsconf *)); +int vfs_stdextattrctl __P((struct mount *mp, int cmd, const char *attrname, + caddr_t arg, struct proc *p)); + +#else /* !_KERNEL */ + +#include + +__BEGIN_DECLS +int fstatfs __P((int, struct statfs *)); +int getfh __P((const char *, fhandle_t *)); +int getfsstat __P((struct statfs *, long, int)); +int getmntinfo __P((struct statfs **, int)); +int mount __P((const char *, const char *, int, void *)); +int statfs __P((const char *, struct statfs *)); +int unmount __P((const char *, int)); +int fhopen __P((const struct fhandle *, int)); +int fhstat __P((const struct fhandle *, struct stat *)); +int fhstatfs __P((const struct fhandle *, struct statfs *)); + +/* C library stuff */ +void endvfsent __P((void)); +struct ovfsconf *getvfsbyname __P((const char *)); +struct ovfsconf *getvfsbytype __P((int)); +struct ovfsconf *getvfsent __P((void)); +#define getvfsbyname new_getvfsbyname +int new_getvfsbyname __P((const char *, struct vfsconf *)); +void setvfsent __P((int)); +int vfsisloadable __P((const char *)); +int vfsload __P((const char *)); +__END_DECLS + +#endif /* _KERNEL */ + +#endif /* !_SYS_MOUNT_H_ */ diff --git a/os/shd/tty_tty.c b/os/shd/tty_tty.c new file mode 100644 index 0000000000000000000000000000000000000000..76e8adc3768969f5804538217d548726db08b8d7 --- /dev/null +++ b/os/shd/tty_tty.c @@ -0,0 +1,203 @@ +/*- + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tty_tty.c 8.2 (Berkeley) 9/23/93 + * $FreeBSD: src/sys/kern/tty_tty.c,v 1.30 1999/09/25 18:24:24 phk Exp $ + */ + +/* + * Indirect driver for controlling tty. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static d_open_t cttyopen; +static d_read_t cttyread; +static d_write_t cttywrite; +static d_ioctl_t cttyioctl; +static d_poll_t cttypoll; + +#define CDEV_MAJOR 1 +/* Don't make this static, since fdesc_vnops uses it. */ +struct cdevsw ctty_cdevsw = { + /* open */ cttyopen, + /* close */ nullclose, + /* read */ cttyread, + /* write */ cttywrite, + /* ioctl */ cttyioctl, + /* poll */ cttypoll, + /* mmap */ nommap, + /* strategy */ nostrategy, + /* name */ "ctty", + /* maj */ CDEV_MAJOR, + /* dump */ nodump, + /* psize */ nopsize, + /* flags */ D_TTY, + /* bmaj */ -1 +}; + +#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL) + +/*ARGSUSED*/ +static int +cttyopen(dev, flag, mode, p) + dev_t dev; + int flag, mode; + struct proc *p; +{ + struct vnode *ttyvp = cttyvp(p); + int error; + + if (ttyvp == NULL) + return (ENXIO); + shd_write_entry (ttyvp, "cttyopen"); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); +#ifdef PARANOID + /* + * Since group is tty and mode is 620 on most terminal lines + * and since sessions protect terminals from processes outside + * your session, this check is probably no longer necessary. + * Since it inhibits setuid root programs that later switch + * to another user from accessing /dev/tty, we have decided + * to delete this test. (mckusick 5/93) + */ + error = VOP_ACCESS(ttyvp, + (flag&FREAD ? VREAD : 0) | (flag&FWRITE ? VWRITE : 0), p->p_ucred, p); + if (!error) +#endif /* PARANOID */ + error = VOP_OPEN(ttyvp, flag, NOCRED, p); + VOP_UNLOCK(ttyvp, 0, p); + shd_write_exit (ttyvp, "cttyopen"); + return (error); +} + +/*ARGSUSED*/ +static int +cttyread(dev, uio, flag) + dev_t dev; + struct uio *uio; + int flag; +{ + struct proc *p = uio->uio_procp; + register struct vnode *ttyvp = cttyvp(p); + int error; + + if (ttyvp == NULL) + return (EIO); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_READ(ttyvp, uio, flag, NOCRED); + VOP_UNLOCK(ttyvp, 0, p); + return (error); +} + +extern void shd_write_entry(struct vnode *vp, char *name); +extern void shd_write_exit(struct vnode *vp, char *name); + +/*ARGSUSED*/ +static int +cttywrite(dev, uio, flag) + dev_t dev; + struct uio *uio; + int flag; +{ + struct proc *p = uio->uio_procp; + struct vnode *ttyvp = cttyvp(uio->uio_procp); + int error; + + if (ttyvp == NULL) + return (EIO); + + shd_write_entry (ttyvp, "cttywrite"); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_WRITE(ttyvp, uio, flag, NOCRED); + VOP_UNLOCK(ttyvp, 0, p); + shd_write_exit (ttyvp, "cttywrite"); + return (error); +} + +/*ARGSUSED*/ +static int +cttyioctl(dev, cmd, addr, flag, p) + dev_t dev; + u_long cmd; + caddr_t addr; + int flag; + struct proc *p; +{ + struct vnode *ttyvp = cttyvp(p); + + if (ttyvp == NULL) + return (EIO); + if (cmd == TIOCSCTTY) /* don't allow controlling tty to be set */ + return EINVAL; /* to controlling tty -- infinite recursion */ + if (cmd == TIOCNOTTY) { + if (!SESS_LEADER(p)) { + p->p_flag &= ~P_CONTROLT; + return (0); + } else + return (EINVAL); + } + return (VOP_IOCTL(ttyvp, cmd, addr, flag, NOCRED, p)); +} + +/*ARGSUSED*/ +static int +cttypoll(dev, events, p) + dev_t dev; + int events; + struct proc *p; +{ + struct vnode *ttyvp = cttyvp(p); + + if (ttyvp == NULL) + /* try operation to get EOF/failure */ + return (seltrue(dev, events, p)); + return (VOP_POLL(ttyvp, events, p->p_ucred, p)); +} + +static void ctty_drvinit __P((void *unused)); +static void +ctty_drvinit(unused) + void *unused; +{ + + make_dev(&ctty_cdevsw, 0, 0, 0, 0666, "tty"); +} + +SYSINIT(cttydev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,ctty_drvinit,NULL) diff --git a/os/shd/uipc_usrreq.c b/os/shd/uipc_usrreq.c new file mode 100644 index 0000000000000000000000000000000000000000..900c969230dc89812fce7fd3ebabbd4027a65e2c --- /dev/null +++ b/os/shd/uipc_usrreq.c @@ -0,0 +1,1354 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 + * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.7 2002/03/09 05:22:23 dd Exp $ + */ + +#include +#include +#include +#include +#include +#include /* XXX must be before */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static struct vm_zone *unp_zone; +static unp_gen_t unp_gencnt; +static u_int unp_count; + +static struct unp_head unp_shead, unp_dhead; + +/* + * Unix communications domain. + * + * TODO: + * SEQPACKET, RDM + * rethink name space problems + * need a proper out-of-band + * lock pushdown + */ +static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; +static ino_t unp_ino; /* prototype for fake inode numbers */ + +static int unp_attach __P((struct socket *)); +static void unp_detach __P((struct unpcb *)); +static int unp_bind __P((struct unpcb *,struct sockaddr *, struct proc *)); +static int unp_connect __P((struct socket *,struct sockaddr *, + struct proc *)); +static void unp_disconnect __P((struct unpcb *)); +static void unp_shutdown __P((struct unpcb *)); +static void unp_drop __P((struct unpcb *, int)); +static void unp_gc __P((void)); +static void unp_scan __P((struct mbuf *, void (*)(struct file *))); +static void unp_mark __P((struct file *)); +static void unp_discard __P((struct file *)); +static int unp_internalize __P((struct mbuf *, struct proc *)); +static int unp_listen __P((struct unpcb *, struct proc *)); + +static int +uipc_abort(struct socket *so) +{ + struct unpcb *unp = sotounpcb(so); + + if (unp == 0) + return EINVAL; + unp_drop(unp, ECONNABORTED); + unp_detach(unp); + sofree(so); + return 0; +} + +static int +uipc_accept(struct socket *so, struct sockaddr **nam) +{ + struct unpcb *unp = sotounpcb(so); + + if (unp == 0) + return EINVAL; + + /* + * Pass back name of connected socket, + * if it was bound and we are still connected + * (our peer may have closed already!). + */ + if (unp->unp_conn && unp->unp_conn->unp_addr) { + *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, + 1); + } else { + *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1); + } + return 0; +} + +static int +uipc_attach(struct socket *so, int proto, struct proc *p) +{ + struct unpcb *unp = sotounpcb(so); + + if (unp != 0) + return EISCONN; + return unp_attach(so); +} + +static int +uipc_bind(struct socket *so, struct sockaddr *nam, struct proc *p) +{ + struct unpcb *unp = sotounpcb(so); + + if (unp == 0) + return EINVAL; + + return unp_bind(unp, nam, p); +} + +static int +uipc_connect(struct socket *so, struct sockaddr *nam, struct proc *p) +{ + struct unpcb *unp = sotounpcb(so); + + if (unp == 0) + return EINVAL; + return unp_connect(so, nam, curproc); +} + +static int +uipc_connect2(struct socket *so1, struct socket *so2) +{ + struct unpcb *unp = sotounpcb(so1); + + if (unp == 0) + return EINVAL; + + return unp_connect2(so1, so2); +} + +/* control is EOPNOTSUPP */ + +static int +uipc_detach(struct socket *so) +{ + struct unpcb *unp = sotounpcb(so); + + if (unp == 0) + return EINVAL; + + unp_detach(unp); + return 0; +} + +static int +uipc_disconnect(struct socket *so) +{ + struct unpcb *unp = sotounpcb(so); + + if (unp == 0) + return EINVAL; + unp_disconnect(unp); + return 0; +} + +static int +uipc_listen(struct socket *so, struct proc *p) +{ + struct unpcb *unp = sotounpcb(so); + + if (unp == 0 || unp->unp_vnode == 0) + return EINVAL; + return unp_listen(unp, p); +} + +static int +uipc_peeraddr(struct socket *so, struct sockaddr **nam) +{ + struct unpcb *unp = sotounpcb(so); + + if (unp == 0) + return EINVAL; + if (unp->unp_conn && unp->unp_conn->unp_addr) + *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, + 1); + return 0; +} + +static int +uipc_rcvd(struct socket *so, int flags) +{ + struct unpcb *unp = sotounpcb(so); + struct socket *so2; + u_long newhiwat; + + if (unp == 0) + return EINVAL; + switch (so->so_type) { + case SOCK_DGRAM: + panic("uipc_rcvd DGRAM?"); + /*NOTREACHED*/ + + case SOCK_STREAM: + if (unp->unp_conn == 0) + break; + so2 = unp->unp_conn->unp_socket; + /* + * Adjust backpressure on sender + * and wakeup any waiting to write. + */ + so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; + unp->unp_mbcnt = so->so_rcv.sb_mbcnt; + newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - + so->so_rcv.sb_cc; + (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, + newhiwat, RLIM_INFINITY); + unp->unp_cc = so->so_rcv.sb_cc; + sowwakeup(so2); + break; + + default: + panic("uipc_rcvd unknown socktype"); + } + return 0; +} + +/* pru_rcvoob is EOPNOTSUPP */ + +static int +uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, + struct mbuf *control, struct proc *p) +{ + int error = 0; + struct unpcb *unp = sotounpcb(so); + struct socket *so2; + u_long newhiwat; + + if (unp == 0) { + error = EINVAL; + goto release; + } + if (flags & PRUS_OOB) { + error = EOPNOTSUPP; + goto release; + } + + if (control && (error = unp_internalize(control, p))) + goto release; + + switch (so->so_type) { + case SOCK_DGRAM: + { + struct sockaddr *from; + + if (nam) { + if (unp->unp_conn) { + error = EISCONN; + break; + } + error = unp_connect(so, nam, p); + if (error) + break; + } else { + if (unp->unp_conn == 0) { + error = ENOTCONN; + break; + } + } + so2 = unp->unp_conn->unp_socket; + if (unp->unp_addr) + from = (struct sockaddr *)unp->unp_addr; + else + from = &sun_noname; + if (sbappendaddr(&so2->so_rcv, from, m, control)) { + sorwakeup(so2); + m = 0; + control = 0; + } else + error = ENOBUFS; + if (nam) + unp_disconnect(unp); + break; + } + + case SOCK_STREAM: + /* Connect if not connected yet. */ + /* + * Note: A better implementation would complain + * if not equal to the peer's address. + */ + if ((so->so_state & SS_ISCONNECTED) == 0) { + if (nam) { + error = unp_connect(so, nam, p); + if (error) + break; /* XXX */ + } else { + error = ENOTCONN; + break; + } + } + + if (so->so_state & SS_CANTSENDMORE) { + error = EPIPE; + break; + } + if (unp->unp_conn == 0) + panic("uipc_send connected but no connection?"); + so2 = unp->unp_conn->unp_socket; + /* + * Send to paired receive port, and then reduce + * send buffer hiwater marks to maintain backpressure. + * Wake up readers. + */ + if (control) { + if (sbappendcontrol(&so2->so_rcv, m, control)) + control = 0; + } else + sbappend(&so2->so_rcv, m); + so->so_snd.sb_mbmax -= + so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; + unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; + newhiwat = so->so_snd.sb_hiwat - + (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc); + (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, + newhiwat, RLIM_INFINITY); + unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; + sorwakeup(so2); + m = 0; + break; + + default: + panic("uipc_send unknown socktype"); + } + + /* + * SEND_EOF is equivalent to a SEND followed by + * a SHUTDOWN. + */ + if (flags & PRUS_EOF) { + socantsendmore(so); + unp_shutdown(unp); + } + + if (control && error != 0) + unp_dispose(control); + +release: + if (control) + m_freem(control); + if (m) + m_freem(m); + return error; +} + +static int +uipc_sense(struct socket *so, struct stat *sb) +{ + struct unpcb *unp = sotounpcb(so); + struct socket *so2; + + if (unp == 0) + return EINVAL; + sb->st_blksize = so->so_snd.sb_hiwat; + if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { + so2 = unp->unp_conn->unp_socket; + sb->st_blksize += so2->so_rcv.sb_cc; + } + sb->st_dev = NOUDEV; + if (unp->unp_ino == 0) + unp->unp_ino = unp_ino++; + sb->st_ino = unp->unp_ino; + return (0); +} + +static int +uipc_shutdown(struct socket *so) +{ + struct unpcb *unp = sotounpcb(so); + + if (unp == 0) + return EINVAL; + socantsendmore(so); + unp_shutdown(unp); + return 0; +} + +static int +uipc_sockaddr(struct socket *so, struct sockaddr **nam) +{ + struct unpcb *unp = sotounpcb(so); + + if (unp == 0) + return EINVAL; + if (unp->unp_addr) + *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1); + return 0; +} + +struct pr_usrreqs uipc_usrreqs = { + uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, + uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, + uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, + uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, + sosend, soreceive, sopoll +}; + +int +uipc_ctloutput(so, sopt) + struct socket *so; + struct sockopt *sopt; +{ + struct unpcb *unp = sotounpcb(so); + int error; + + switch (sopt->sopt_dir) { + case SOPT_GET: + switch (sopt->sopt_name) { + case LOCAL_PEERCRED: + if (unp->unp_flags & UNP_HAVEPC) + error = sooptcopyout(sopt, &unp->unp_peercred, + sizeof(unp->unp_peercred)); + else { + if (so->so_type == SOCK_STREAM) + error = ENOTCONN; + else + error = EINVAL; + } + break; + default: + error = EOPNOTSUPP; + break; + } + break; + case SOPT_SET: + default: + error = EOPNOTSUPP; + break; + } + return (error); +} + +/* + * Both send and receive buffers are allocated PIPSIZ bytes of buffering + * for stream sockets, although the total for sender and receiver is + * actually only PIPSIZ. + * Datagram sockets really use the sendspace as the maximum datagram size, + * and don't really want to reserve the sendspace. Their recvspace should + * be large enough for at least one max-size datagram plus address. + */ +#ifndef PIPSIZ +#define PIPSIZ 8192 +#endif +static u_long unpst_sendspace = PIPSIZ; +static u_long unpst_recvspace = PIPSIZ; +static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ +static u_long unpdg_recvspace = 4*1024; + +static int unp_rights; /* file descriptors in flight */ + +SYSCTL_DECL(_net_local_stream); +SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, + &unpst_sendspace, 0, ""); +SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, + &unpst_recvspace, 0, ""); +SYSCTL_DECL(_net_local_dgram); +SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, + &unpdg_sendspace, 0, ""); +SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, + &unpdg_recvspace, 0, ""); +SYSCTL_DECL(_net_local); +SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); + +static int +unp_attach(so) + struct socket *so; +{ + register struct unpcb *unp; + int error; + + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { + switch (so->so_type) { + + case SOCK_STREAM: + error = soreserve(so, unpst_sendspace, unpst_recvspace); + break; + + case SOCK_DGRAM: + error = soreserve(so, unpdg_sendspace, unpdg_recvspace); + break; + + default: + panic("unp_attach"); + } + if (error) + return (error); + } + unp = zalloc(unp_zone); + if (unp == NULL) + return (ENOBUFS); + bzero(unp, sizeof *unp); + unp->unp_gencnt = ++unp_gencnt; + unp_count++; + LIST_INIT(&unp->unp_refs); + unp->unp_socket = so; + unp->unp_rvnode = curproc->p_fd->fd_rdir; + LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead + : &unp_shead, unp, unp_link); + so->so_pcb = (caddr_t)unp; + return (0); +} + +static void +unp_detach(unp) + register struct unpcb *unp; +{ + LIST_REMOVE(unp, unp_link); + unp->unp_gencnt = ++unp_gencnt; + --unp_count; + if (unp->unp_vnode) { + unp->unp_vnode->v_socket = 0; + vrele(unp->unp_vnode); + unp->unp_vnode = 0; + } + if (unp->unp_conn) + unp_disconnect(unp); + while (!LIST_EMPTY(&unp->unp_refs)) + unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET); + soisdisconnected(unp->unp_socket); + unp->unp_socket->so_pcb = 0; + if (unp_rights) { + /* + * Normally the receive buffer is flushed later, + * in sofree, but if our receive buffer holds references + * to descriptors that are now garbage, we will dispose + * of those descriptor references after the garbage collector + * gets them (resulting in a "panic: closef: count < 0"). + */ + sorflush(unp->unp_socket); + unp_gc(); + } + if (unp->unp_addr) + FREE(unp->unp_addr, M_SONAME); + zfree(unp_zone, unp); +} + +extern void shd_write_entry(struct vnode *vp, char *name); +extern void shd_write_exit(struct vnode *vp, char *name); + +static int +unp_bind(unp, nam, p) + struct unpcb *unp; + struct sockaddr *nam; + struct proc *p; +{ + struct sockaddr_un *soun = (struct sockaddr_un *)nam; + register struct vnode *vp; + struct vattr vattr; + int error, namelen; + struct nameidata nd; + char buf[SOCK_MAXADDRLEN]; + + if (unp->unp_vnode != NULL) + return (EINVAL); + namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); + if (namelen <= 0) + return EINVAL; + strncpy(buf, soun->sun_path, namelen); + buf[namelen] = 0; /* null-terminate the string */ + NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, + buf, p); +/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ + error = namei(&nd); + if (error) + return (error); + vp = nd.ni_vp; + if (vp != NULL) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vrele(vp); + return (EADDRINUSE); + } + VATTR_NULL(&vattr); + vattr.va_type = VSOCK; + vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + shd_write_entry (nd.ni_dvp, "unp_bind"); + error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + shd_write_exit (nd.ni_dvp, "unp_bind"); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if (error) + return (error); + vp = nd.ni_vp; + vp->v_socket = unp->unp_socket; + unp->unp_vnode = vp; + unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); + VOP_UNLOCK(vp, 0, p); + return (0); +} + +static int +unp_connect(so, nam, p) + struct socket *so; + struct sockaddr *nam; + struct proc *p; +{ + register struct sockaddr_un *soun = (struct sockaddr_un *)nam; + register struct vnode *vp; + register struct socket *so2, *so3; + struct unpcb *unp, *unp2, *unp3; + int error, len; + struct nameidata nd; + char buf[SOCK_MAXADDRLEN]; + + len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); + if (len <= 0) + return EINVAL; + strncpy(buf, soun->sun_path, len); + buf[len] = 0; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, p); + error = namei(&nd); + if (error) + return (error); + vp = nd.ni_vp; + NDFREE(&nd, NDF_ONLY_PNBUF); + if (vp->v_type != VSOCK) { + error = ENOTSOCK; + goto bad; + } + shd_write_entry (nd.ni_dvp, "unp_connect"); + error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p); + shd_write_exit (nd.ni_dvp, "unp_connect"); + if (error) + goto bad; + so2 = vp->v_socket; + if (so2 == 0) { + error = ECONNREFUSED; + goto bad; + } + if (so->so_type != so2->so_type) { + error = EPROTOTYPE; + goto bad; + } + if (so->so_proto->pr_flags & PR_CONNREQUIRED) { + if ((so2->so_options & SO_ACCEPTCONN) == 0 || + (so3 = sonewconn3(so2, 0, p)) == 0) { + error = ECONNREFUSED; + goto bad; + } + unp = sotounpcb(so); + unp2 = sotounpcb(so2); + unp3 = sotounpcb(so3); + if (unp2->unp_addr) + unp3->unp_addr = (struct sockaddr_un *) + dup_sockaddr((struct sockaddr *) + unp2->unp_addr, 1); + + /* + * unp_peercred management: + * + * The connecter's (client's) credentials are copied + * from its process structure at the time of connect() + * (which is now). + */ + cru2x(p->p_ucred, &unp3->unp_peercred); + unp3->unp_flags |= UNP_HAVEPC; + /* + * The receiver's (server's) credentials are copied + * from the unp_peercred member of socket on which the + * former called listen(); unp_listen() cached that + * process's credentials at that time so we can use + * them now. + */ + KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, + ("unp_connect: listener without cached peercred")); + memcpy(&unp->unp_peercred, &unp2->unp_peercred, + sizeof(unp->unp_peercred)); + unp->unp_flags |= UNP_HAVEPC; + + so2 = so3; + } + error = unp_connect2(so, so2); +bad: + vput(vp); + return (error); +} + +int +unp_connect2(so, so2) + register struct socket *so; + register struct socket *so2; +{ + register struct unpcb *unp = sotounpcb(so); + register struct unpcb *unp2; + + if (so2->so_type != so->so_type) + return (EPROTOTYPE); + unp2 = sotounpcb(so2); + unp->unp_conn = unp2; + switch (so->so_type) { + + case SOCK_DGRAM: + LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); + soisconnected(so); + break; + + case SOCK_STREAM: + unp2->unp_conn = unp; + soisconnected(so); + soisconnected(so2); + break; + + default: + panic("unp_connect2"); + } + return (0); +} + +static void +unp_disconnect(unp) + struct unpcb *unp; +{ + register struct unpcb *unp2 = unp->unp_conn; + + if (unp2 == 0) + return; + unp->unp_conn = 0; + switch (unp->unp_socket->so_type) { + + case SOCK_DGRAM: + LIST_REMOVE(unp, unp_reflink); + unp->unp_socket->so_state &= ~SS_ISCONNECTED; + break; + + case SOCK_STREAM: + soisdisconnected(unp->unp_socket); + unp2->unp_conn = 0; + soisdisconnected(unp2->unp_socket); + break; + } +} + +#ifdef notdef +void +unp_abort(unp) + struct unpcb *unp; +{ + + unp_detach(unp); +} +#endif + +static int +prison_unpcb(struct proc *p, struct unpcb *unp) +{ + if (!p->p_prison) + return (0); + if (p->p_fd->fd_rdir == unp->unp_rvnode) + return (0); + return (1); +} + +static int +unp_pcblist(SYSCTL_HANDLER_ARGS) +{ + int error, i, n; + struct unpcb *unp, **unp_list; + unp_gen_t gencnt; + struct xunpgen xug; + struct unp_head *head; + + head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); + + /* + * The process of preparing the PCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + if (req->oldptr == 0) { + n = unp_count; + req->oldidx = 2 * (sizeof xug) + + (n + n/8) * sizeof(struct xunpcb); + return 0; + } + + if (req->newptr != 0) + return EPERM; + + /* + * OK, now we're committed to doing something. + */ + gencnt = unp_gencnt; + n = unp_count; + + xug.xug_len = sizeof xug; + xug.xug_count = n; + xug.xug_gen = gencnt; + xug.xug_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xug, sizeof xug); + if (error) + return error; + + unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); + if (unp_list == 0) + return ENOMEM; + + for (unp = LIST_FIRST(head), i = 0; unp && i < n; + unp = LIST_NEXT(unp, unp_link)) { + if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->p, unp)) + unp_list[i++] = unp; + } + n = i; /* in case we lost some during malloc */ + + error = 0; + for (i = 0; i < n; i++) { + unp = unp_list[i]; + if (unp->unp_gencnt <= gencnt) { + struct xunpcb xu; + xu.xu_len = sizeof xu; + xu.xu_unpp = unp; + /* + * XXX - need more locking here to protect against + * connect/disconnect races for SMP. + */ + if (unp->unp_addr) + bcopy(unp->unp_addr, &xu.xu_addr, + unp->unp_addr->sun_len); + if (unp->unp_conn && unp->unp_conn->unp_addr) + bcopy(unp->unp_conn->unp_addr, + &xu.xu_caddr, + unp->unp_conn->unp_addr->sun_len); + bcopy(unp, &xu.xu_unp, sizeof *unp); + sotoxsocket(unp->unp_socket, &xu.xu_socket); + error = SYSCTL_OUT(req, &xu, sizeof xu); + } + } + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + xug.xug_gen = unp_gencnt; + xug.xug_sogen = so_gencnt; + xug.xug_count = unp_count; + error = SYSCTL_OUT(req, &xug, sizeof xug); + } + free(unp_list, M_TEMP); + return error; +} + +SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, + (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", + "List of active local datagram sockets"); +SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, + (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", + "List of active local stream sockets"); + +static void +unp_shutdown(unp) + struct unpcb *unp; +{ + struct socket *so; + + if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && + (so = unp->unp_conn->unp_socket)) + socantrcvmore(so); +} + +static void +unp_drop(unp, errno) + struct unpcb *unp; + int errno; +{ + struct socket *so = unp->unp_socket; + + so->so_error = errno; + unp_disconnect(unp); +} + +#ifdef notdef +void +unp_drain() +{ + +} +#endif + +int +unp_externalize(rights) + struct mbuf *rights; +{ + struct proc *p = curproc; /* XXX */ + register int i; + register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); + register int *fdp; + register struct file **rp; + register struct file *fp; + int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) + / sizeof (struct file *); + int f; + + /* + * if the new FD's will not fit, then we free them all + */ + if (!fdavail(p, newfds)) { + rp = (struct file **)CMSG_DATA(cm); + for (i = 0; i < newfds; i++) { + fp = *rp; + /* + * zero the pointer before calling unp_discard, + * since it may end up in unp_gc().. + */ + *rp++ = 0; + unp_discard(fp); + } + return (EMSGSIZE); + } + /* + * now change each pointer to an fd in the global table to + * an integer that is the index to the local fd table entry + * that we set up to point to the global one we are transferring. + * If sizeof (struct file *) is bigger than or equal to sizeof int, + * then do it in forward order. In that case, an integer will + * always come in the same place or before its corresponding + * struct file pointer. + * If sizeof (struct file *) is smaller than sizeof int, then + * do it in reverse order. + */ + if (sizeof (struct file *) >= sizeof (int)) { + fdp = (int *)(cm + 1); + rp = (struct file **)CMSG_DATA(cm); + for (i = 0; i < newfds; i++) { + if (fdalloc(p, 0, &f)) + panic("unp_externalize"); + fp = *rp++; + p->p_fd->fd_ofiles[f] = fp; + fp->f_msgcount--; + unp_rights--; + *fdp++ = f; + } + } else { + fdp = (int *)(cm + 1) + newfds - 1; + rp = (struct file **)CMSG_DATA(cm) + newfds - 1; + for (i = 0; i < newfds; i++) { + if (fdalloc(p, 0, &f)) + panic("unp_externalize"); + fp = *rp--; + p->p_fd->fd_ofiles[f] = fp; + fp->f_msgcount--; + unp_rights--; + *fdp-- = f; + } + } + + /* + * Adjust length, in case sizeof(struct file *) and sizeof(int) + * differs. + */ + cm->cmsg_len = CMSG_LEN(newfds * sizeof(int)); + rights->m_len = cm->cmsg_len; + return (0); +} + +void +unp_init(void) +{ + unp_zone = zinit("unpcb", sizeof(struct unpcb), nmbclusters, 0, 0); + if (unp_zone == 0) + panic("unp_init"); + LIST_INIT(&unp_dhead); + LIST_INIT(&unp_shead); +} + +#ifndef MIN +#define MIN(a,b) (((a)<(b))?(a):(b)) +#endif + +static int +unp_internalize(control, p) + struct mbuf *control; + struct proc *p; +{ + struct filedesc *fdescp = p->p_fd; + register struct cmsghdr *cm = mtod(control, struct cmsghdr *); + register struct file **rp; + register struct file *fp; + register int i, fd, *fdp; + register struct cmsgcred *cmcred; + int oldfds; + u_int newlen; + + if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || + cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len) + return (EINVAL); + + /* + * Fill in credential information. + */ + if (cm->cmsg_type == SCM_CREDS) { + cmcred = (struct cmsgcred *)(cm + 1); + cmcred->cmcred_pid = p->p_pid; + cmcred->cmcred_uid = p->p_cred->p_ruid; + cmcred->cmcred_gid = p->p_cred->p_rgid; + cmcred->cmcred_euid = p->p_ucred->cr_uid; + cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, + CMGROUP_MAX); + for (i = 0; i < cmcred->cmcred_ngroups; i++) + cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; + return(0); + } + + oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); + /* + * check that all the FDs passed in refer to legal OPEN files + * If not, reject the entire operation. + */ + fdp = (int *)(cm + 1); + for (i = 0; i < oldfds; i++) { + fd = *fdp++; + if ((unsigned)fd >= fdescp->fd_nfiles || + fdescp->fd_ofiles[fd] == NULL) + return (EBADF); + } + /* + * Now replace the integer FDs with pointers to + * the associated global file table entry.. + * Allocate a bigger buffer as necessary. But if an cluster is not + * enough, return E2BIG. + */ + newlen = CMSG_LEN(oldfds * sizeof(struct file *)); + if (newlen > MCLBYTES) + return (E2BIG); + if (newlen - control->m_len > M_TRAILINGSPACE(control)) { + if (control->m_flags & M_EXT) + return (E2BIG); + MCLGET(control, M_WAIT); + if ((control->m_flags & M_EXT) == 0) + return (ENOBUFS); + + /* copy the data to the cluster */ + memcpy(mtod(control, char *), cm, cm->cmsg_len); + cm = mtod(control, struct cmsghdr *); + } + + /* + * Adjust length, in case sizeof(struct file *) and sizeof(int) + * differs. + */ + control->m_len = cm->cmsg_len = newlen; + + /* + * Transform the file descriptors into struct file pointers. + * If sizeof (struct file *) is bigger than or equal to sizeof int, + * then do it in reverse order so that the int won't get until + * we're done. + * If sizeof (struct file *) is smaller than sizeof int, then + * do it in forward order. + */ + if (sizeof (struct file *) >= sizeof (int)) { + fdp = (int *)(cm + 1) + oldfds - 1; + rp = (struct file **)CMSG_DATA(cm) + oldfds - 1; + for (i = 0; i < oldfds; i++) { + fp = fdescp->fd_ofiles[*fdp--]; + *rp-- = fp; + fp->f_count++; + fp->f_msgcount++; + unp_rights++; + } + } else { + fdp = (int *)(cm + 1); + rp = (struct file **)CMSG_DATA(cm); + for (i = 0; i < oldfds; i++) { + fp = fdescp->fd_ofiles[*fdp++]; + *rp++ = fp; + fp->f_count++; + fp->f_msgcount++; + unp_rights++; + } + } + return (0); +} + +static int unp_defer, unp_gcing; + +static void +unp_gc() +{ + register struct file *fp, *nextfp; + register struct socket *so; + struct file **extra_ref, **fpp; + int nunref, i; + + if (unp_gcing) + return; + unp_gcing = 1; + unp_defer = 0; + /* + * before going through all this, set all FDs to + * be NOT defered and NOT externally accessible + */ + LIST_FOREACH(fp, &filehead, f_list) + fp->f_flag &= ~(FMARK|FDEFER); + do { + LIST_FOREACH(fp, &filehead, f_list) { + /* + * If the file is not open, skip it + */ + if (fp->f_count == 0) + continue; + /* + * If we already marked it as 'defer' in a + * previous pass, then try process it this time + * and un-mark it + */ + if (fp->f_flag & FDEFER) { + fp->f_flag &= ~FDEFER; + unp_defer--; + } else { + /* + * if it's not defered, then check if it's + * already marked.. if so skip it + */ + if (fp->f_flag & FMARK) + continue; + /* + * If all references are from messages + * in transit, then skip it. it's not + * externally accessible. + */ + if (fp->f_count == fp->f_msgcount) + continue; + /* + * If it got this far then it must be + * externally accessible. + */ + fp->f_flag |= FMARK; + } + /* + * either it was defered, or it is externally + * accessible and not already marked so. + * Now check if it is possibly one of OUR sockets. + */ + if (fp->f_type != DTYPE_SOCKET || + (so = (struct socket *)fp->f_data) == 0) + continue; + if (so->so_proto->pr_domain != &localdomain || + (so->so_proto->pr_flags&PR_RIGHTS) == 0) + continue; +#ifdef notdef + if (so->so_rcv.sb_flags & SB_LOCK) { + /* + * This is problematical; it's not clear + * we need to wait for the sockbuf to be + * unlocked (on a uniprocessor, at least), + * and it's also not clear what to do + * if sbwait returns an error due to receipt + * of a signal. If sbwait does return + * an error, we'll go into an infinite + * loop. Delete all of this for now. + */ + (void) sbwait(&so->so_rcv); + goto restart; + } +#endif + /* + * So, Ok, it's one of our sockets and it IS externally + * accessible (or was defered). Now we look + * to see if we hold any file descriptors in its + * message buffers. Follow those links and mark them + * as accessible too. + */ + unp_scan(so->so_rcv.sb_mb, unp_mark); + } + } while (unp_defer); + /* + * We grab an extra reference to each of the file table entries + * that are not otherwise accessible and then free the rights + * that are stored in messages on them. + * + * The bug in the orginal code is a little tricky, so I'll describe + * what's wrong with it here. + * + * It is incorrect to simply unp_discard each entry for f_msgcount + * times -- consider the case of sockets A and B that contain + * references to each other. On a last close of some other socket, + * we trigger a gc since the number of outstanding rights (unp_rights) + * is non-zero. If during the sweep phase the gc code un_discards, + * we end up doing a (full) closef on the descriptor. A closef on A + * results in the following chain. Closef calls soo_close, which + * calls soclose. Soclose calls first (through the switch + * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply + * returns because the previous instance had set unp_gcing, and + * we return all the way back to soclose, which marks the socket + * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush + * to free up the rights that are queued in messages on the socket A, + * i.e., the reference on B. The sorflush calls via the dom_dispose + * switch unp_dispose, which unp_scans with unp_discard. This second + * instance of unp_discard just calls closef on B. + * + * Well, a similar chain occurs on B, resulting in a sorflush on B, + * which results in another closef on A. Unfortunately, A is already + * being closed, and the descriptor has already been marked with + * SS_NOFDREF, and soclose panics at this point. + * + * Here, we first take an extra reference to each inaccessible + * descriptor. Then, we call sorflush ourself, since we know + * it is a Unix domain socket anyhow. After we destroy all the + * rights carried in messages, we do a last closef to get rid + * of our extra reference. This is the last close, and the + * unp_detach etc will shut down the socket. + * + * 91/09/19, bsy@cs.cmu.edu + */ + extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); + for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0; + fp = nextfp) { + nextfp = LIST_NEXT(fp, f_list); + /* + * If it's not open, skip it + */ + if (fp->f_count == 0) + continue; + /* + * If all refs are from msgs, and it's not marked accessible + * then it must be referenced from some unreachable cycle + * of (shut-down) FDs, so include it in our + * list of FDs to remove + */ + if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { + *fpp++ = fp; + nunref++; + fp->f_count++; + } + } + /* + * for each FD on our hit list, do the following two things + */ + for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { + struct file *tfp = *fpp; + if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) + sorflush((struct socket *)(tfp->f_data)); + } + for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) + closef(*fpp, (struct proc *) NULL); + free((caddr_t)extra_ref, M_FILE); + unp_gcing = 0; +} + +void +unp_dispose(m) + struct mbuf *m; +{ + + if (m) + unp_scan(m, unp_discard); +} + +static int +unp_listen(unp, p) + struct unpcb *unp; + struct proc *p; +{ + + cru2x(p->p_ucred, &unp->unp_peercred); + unp->unp_flags |= UNP_HAVEPCCACHED; + return (0); +} + +static void +unp_scan(m0, op) + register struct mbuf *m0; + void (*op) __P((struct file *)); +{ + register struct mbuf *m; + register struct file **rp; + register struct cmsghdr *cm; + register int i; + int qfds; + + while (m0) { + for (m = m0; m; m = m->m_next) + if (m->m_type == MT_CONTROL && + m->m_len >= sizeof(*cm)) { + cm = mtod(m, struct cmsghdr *); + if (cm->cmsg_level != SOL_SOCKET || + cm->cmsg_type != SCM_RIGHTS) + continue; + qfds = (cm->cmsg_len - + (CMSG_DATA(cm) - (u_char *)cm)) + / sizeof (struct file *); + rp = (struct file **)CMSG_DATA(cm); + for (i = 0; i < qfds; i++) + (*op)(*rp++); + break; /* XXX, but saves time */ + } + m0 = m0->m_act; + } +} + +static void +unp_mark(fp) + struct file *fp; +{ + + if (fp->f_flag & FMARK) + return; + unp_defer++; + fp->f_flag |= (FMARK|FDEFER); +} + +static void +unp_discard(fp) + struct file *fp; +{ + + fp->f_msgcount--; + unp_rights--; + (void) closef(fp, (struct proc *)NULL); +} diff --git a/os/shd/vfs_syscalls.c b/os/shd/vfs_syscalls.c new file mode 100644 index 0000000000000000000000000000000000000000..3e56c0f27cd51df90e40b72ef57d349cae6d610a --- /dev/null +++ b/os/shd/vfs_syscalls.c @@ -0,0 +1,3773 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 + * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.16 2002/04/26 00:46:04 iedowse Exp $ + */ + +/* For 4.3 integer FS ID compatibility */ +#include "opt_compat.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static int change_dir __P((struct nameidata *ndp, struct proc *p)); +static void checkdirs __P((struct vnode *olddp)); +static int chroot_refuse_vdir_fds __P((struct filedesc *fdp)); +static int getutimes __P((const struct timeval *, struct timespec *)); +static int setfown __P((struct proc *, struct vnode *, uid_t, gid_t)); +static int setfmode __P((struct proc *, struct vnode *, int)); +static int setfflags __P((struct proc *, struct vnode *, int)); +static int setutimes __P((struct proc *, struct vnode *, + const struct timespec *, int)); +static int usermount = 0; /* if 1, non-root can mount fs. */ + +int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *)); + +SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); + +/* + * Virtual File System System Calls + */ + +extern void shd_write_entry(struct vnode *vp, char *name); +extern void shd_write_exit(struct vnode *vp, char *name); + +/* + * Mount a file system. + */ +#ifndef _SYS_SYSPROTO_H_ +struct mount_args { + char *type; + char *path; + int flags; + caddr_t data; +}; +#endif +/* ARGSUSED */ +int +mount(p, uap) + struct proc *p; + register struct mount_args /* { + syscallarg(char *) type; + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(caddr_t) data; + } */ *uap; +{ + struct vnode *vp; + struct mount *mp; + struct vfsconf *vfsp; + int error, flag = 0, flag2 = 0; + struct vattr va; +#ifdef COMPAT_43 + u_long fstypenum; +#endif + struct nameidata nd; + char fstypename[MFSNAMELEN]; + + if (usermount == 0 && (error = suser(p))) + return (error); + /* + * Do not allow NFS export by non-root users. + */ + if (SCARG(uap, flags) & MNT_EXPORTED) { + error = suser(p); + if (error) + return (error); + } + /* + * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users + */ + if (suser_xxx(p->p_ucred, 0, 0)) + SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; + /* + * Get vnode to be covered + */ + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + if (SCARG(uap, flags) & MNT_UPDATE) { + if ((vp->v_flag & VROOT) == 0) { + vput(vp); + return (EINVAL); + } + mp = vp->v_mount; + flag = mp->mnt_flag; + flag2 = mp->mnt_kern_flag; + /* + * We only allow the filesystem to be reloaded if it + * is currently mounted read-only. + */ + if ((SCARG(uap, flags) & MNT_RELOAD) && + ((mp->mnt_flag & MNT_RDONLY) == 0)) { + vput(vp); + return (EOPNOTSUPP); /* Needs translation */ + } + /* + * Only mounts made within a prison can be updated from + * a prison. + */ + if (p->p_prison && p->p_prison != mp->mnt_prison) { + vput(vp); + return (EPERM); + } + /* + * Only root, or the user that did the original mount is + * permitted to update it. + */ + if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && + (error = suser(p))) { + vput(vp); + return (error); + } + if (vfs_busy(mp, LK_NOWAIT, 0, p)) { + vput(vp); + return (EBUSY); + } + simple_lock(&vp->v_interlock); + if ((vp->v_flag & VMOUNT) != 0 || + vp->v_mountedhere != NULL) { + simple_unlock(&vp->v_interlock); + vfs_unbusy(mp, p); + vput(vp); + return (EBUSY); + } + vp->v_flag |= VMOUNT; + simple_unlock(&vp->v_interlock); + mp->mnt_flag |= + SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); + VOP_UNLOCK(vp, 0, p); + goto update; + } + /* + * If the user is not root, ensure that they own the directory + * onto which we are attempting to mount. + */ + if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) || + (va.va_uid != p->p_ucred->cr_uid && + (error = suser(p)))) { + vput(vp); + return (error); + } + if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) { + vput(vp); + return (error); + } + if (vp->v_type != VDIR) { + vput(vp); + return (ENOTDIR); + } +#ifdef COMPAT_43 + /* + * Historically filesystem types were identified by number. If we + * get an integer for the filesystem type instead of a string, we + * check to see if it matches one of the historic filesystem types. + */ + fstypenum = (uintptr_t)SCARG(uap, type); + if (fstypenum < maxvfsconf) { + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == fstypenum) + break; + if (vfsp == NULL) { + vput(vp); + return (ENODEV); + } + strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); + } else +#endif /* COMPAT_43 */ + if ((error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) != 0) { + vput(vp); + return (error); + } + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstypename)) + break; + if (vfsp == NULL) { + linker_file_t lf; + + /* Only load modules for root (very important!) */ + if ((error = suser(p)) != 0) { + vput(vp); + return error; + } + error = linker_load_file(fstypename, &lf); + if (error || lf == NULL) { + vput(vp); + if (lf == NULL) + error = ENODEV; + return error; + } + lf->userrefs++; + /* lookup again, see if the VFS was loaded */ + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstypename)) + break; + if (vfsp == NULL) { + lf->userrefs--; + linker_file_unload(lf); + vput(vp); + return (ENODEV); + } + } + simple_lock(&vp->v_interlock); + if ((vp->v_flag & VMOUNT) != 0 || + vp->v_mountedhere != NULL) { + simple_unlock(&vp->v_interlock); + vput(vp); + return (EBUSY); + } + vp->v_flag |= VMOUNT; + simple_unlock(&vp->v_interlock); + + /* + * Allocate and initialize the filesystem. + */ + mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + TAILQ_INIT(&mp->mnt_nvnodelist); + TAILQ_INIT(&mp->mnt_reservedvnlist); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); + (void)vfs_busy(mp, LK_NOWAIT, 0, p); + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_vfc = vfsp; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + if (p->p_prison) + mp->mnt_prison = p->p_prison; + strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); + mp->mnt_vnodecovered = vp; + mp->mnt_stat.f_owner = p->p_ucred->cr_uid; + mp->mnt_iosize_max = DFLTPHYS; + VOP_UNLOCK(vp, 0, p); +update: + /* + * Set the mount level flags. + */ + if (SCARG(uap, flags) & MNT_RDONLY) + mp->mnt_flag |= MNT_RDONLY; + else if (mp->mnt_flag & MNT_RDONLY) + mp->mnt_kern_flag |= MNTK_WANTRDWR; + mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | + MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | + MNT_NOSYMFOLLOW | MNT_IGNORE | + MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); + mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC | + MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | + MNT_NOSYMFOLLOW | MNT_IGNORE | + MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); + /* + * Mount the filesystem. + * XXX The final recipients of VFS_MOUNT just overwrite the ndp they + * get. No freeing of cn_pnbuf. + */ + error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p); + if (mp->mnt_flag & MNT_UPDATE) { + if (mp->mnt_kern_flag & MNTK_WANTRDWR) + mp->mnt_flag &= ~MNT_RDONLY; + mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); + mp->mnt_kern_flag &=~ MNTK_WANTRDWR; + if (error) { + mp->mnt_flag = flag; + mp->mnt_kern_flag = flag2; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if (mp->mnt_syncer == NULL) + error = vfs_allocate_syncvnode(mp); + } else { + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); + mp->mnt_syncer = NULL; + } + vfs_unbusy(mp, p); + simple_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + simple_unlock(&vp->v_interlock); + vrele(vp); + return (error); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + /* + * Put the new filesystem on the mount list after root. + */ + cache_purge(vp); + if (!error) { + simple_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + vp->v_mountedhere = mp; + simple_unlock(&vp->v_interlock); + simple_lock(&mountlist_slock); + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + simple_unlock(&mountlist_slock); + checkdirs(vp); + VOP_UNLOCK(vp, 0, p); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + error = vfs_allocate_syncvnode(mp); + vfs_unbusy(mp, p); + if ((error = VFS_START(mp, 0, p)) != 0) + vrele(vp); + } else { + simple_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + simple_unlock(&vp->v_interlock); + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, p); + free((caddr_t)mp, M_MOUNT); + vput(vp); + } + return (error); +} + +/* + * Scan all active processes to see if any of them have a current + * or root directory onto which the new filesystem has just been + * mounted. If so, replace them with the new mount point. + */ +static void +checkdirs(olddp) + struct vnode *olddp; +{ + struct filedesc *fdp; + struct vnode *newdp; + struct proc *p; + + if (olddp->v_usecount == 1) + return; + if (VFS_ROOT(olddp->v_mountedhere, &newdp)) + panic("mount: lost mount"); + LIST_FOREACH(p, &allproc, p_list) { + fdp = p->p_fd; + if (fdp->fd_cdir == olddp) { + vrele(fdp->fd_cdir); + VREF(newdp); + fdp->fd_cdir = newdp; + } + if (fdp->fd_rdir == olddp) { + vrele(fdp->fd_rdir); + VREF(newdp); + fdp->fd_rdir = newdp; + } + } + if (rootvnode == olddp) { + vrele(rootvnode); + VREF(newdp); + rootvnode = newdp; + } + vput(newdp); +} + +/* + * Unmount a file system. + * + * Note: unmount takes a path to the vnode mounted on as argument, + * not special file (as before). + */ +#ifndef _SYS_SYSPROTO_H_ +struct unmount_args { + char *path; + int flags; +}; +#endif +/* ARGSUSED */ +int +unmount(p, uap) + struct proc *p; + register struct unmount_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; +{ + register struct vnode *vp; + struct mount *mp; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + NDFREE(&nd, NDF_ONLY_PNBUF); + mp = vp->v_mount; + + /* + * Only mounts made within a prison can be umounted from a prison. + */ + if (p->p_prison && p->p_prison != mp->mnt_prison) { + vput(vp); + return (EPERM); + } + /* + * Only root, or the user that did the original mount is + * permitted to unmount this filesystem. + */ + if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && + (error = suser(p))) { + vput(vp); + return (error); + } + + /* + * Don't allow unmounting the root file system. + */ + if (mp->mnt_flag & MNT_ROOTFS) { + vput(vp); + return (EINVAL); + } + + /* + * Must be the root of the filesystem + */ + if ((vp->v_flag & VROOT) == 0) { + vput(vp); + return (EINVAL); + } + vput(vp); + return (dounmount(mp, SCARG(uap, flags), p)); +} + +/* + * Do the actual file system unmount. + */ +int +dounmount(mp, flags, p) + register struct mount *mp; + int flags; + struct proc *p; +{ + struct vnode *coveredvp; + int error; + int async_flag; + + simple_lock(&mountlist_slock); + if (mp->mnt_kern_flag & MNTK_UNMOUNT) { + simple_unlock(&mountlist_slock); + return (EBUSY); + } + mp->mnt_kern_flag |= MNTK_UNMOUNT; + /* Allow filesystems to detect that a forced unmount is in progress. */ + if (flags & MNT_FORCE) + mp->mnt_kern_flag |= MNTK_UNMOUNTF; + error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK | + ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_slock, p); + if (error) { + mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); + if (mp->mnt_kern_flag & MNTK_MWAIT) + wakeup((caddr_t)mp); + return (error); + } + + if (mp->mnt_flag & MNT_EXPUBLIC) + vfs_setpublicfs(NULL, NULL, NULL); + + vfs_msync(mp, MNT_WAIT); + async_flag = mp->mnt_flag & MNT_ASYNC; + mp->mnt_flag &=~ MNT_ASYNC; + cache_purgevfs(mp); /* remove cache entries for this file sys */ + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); + if (((mp->mnt_flag & MNT_RDONLY) || + (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || + (flags & MNT_FORCE)) + error = VFS_UNMOUNT(mp, flags, p); + simple_lock(&mountlist_slock); + if (error) { + if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) + (void) vfs_allocate_syncvnode(mp); + mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); + mp->mnt_flag |= async_flag; + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, + &mountlist_slock, p); + if (mp->mnt_kern_flag & MNTK_MWAIT) + wakeup((caddr_t)mp); + return (error); + } + TAILQ_REMOVE(&mountlist, mp, mnt_list); + if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { + coveredvp->v_mountedhere = (struct mount *)0; + vrele(coveredvp); + } + mp->mnt_vfc->vfc_refcount--; + if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) + panic("unmount: dangling vnode"); + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p); + if (mp->mnt_kern_flag & MNTK_MWAIT) + wakeup((caddr_t)mp); + free((caddr_t)mp, M_MOUNT); + return (0); +} + +/* + * Sync each mounted filesystem. + */ +#ifndef _SYS_SYSPROTO_H_ +struct sync_args { + int dummy; +}; +#endif + +#ifdef DEBUG +static int syncprt = 0; +SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); +#endif + +/* ARGSUSED */ +int +sync(p, uap) + struct proc *p; + struct sync_args *uap; +{ + register struct mount *mp, *nmp; + int asyncflag; + + simple_lock(&mountlist_slock); + for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { + if (p->p_prison && p->p_prison != mp->mnt_prison) { + nmp = TAILQ_NEXT(mp, mnt_list); + continue; + } + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = TAILQ_NEXT(mp, mnt_list); + continue; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + asyncflag = mp->mnt_flag & MNT_ASYNC; + mp->mnt_flag &= ~MNT_ASYNC; + vfs_msync(mp, MNT_NOWAIT); + VFS_SYNC(mp, MNT_NOWAIT, + ((p != NULL) ? p->p_ucred : NOCRED), p); + mp->mnt_flag |= asyncflag; + } + simple_lock(&mountlist_slock); + nmp = TAILQ_NEXT(mp, mnt_list); + vfs_unbusy(mp, p); + } + simple_unlock(&mountlist_slock); +#if 0 +/* + * XXX don't call vfs_bufstats() yet because that routine + * was not imported in the Lite2 merge. + */ +#ifdef DIAGNOSTIC + if (syncprt) + vfs_bufstats(); +#endif /* DIAGNOSTIC */ +#endif + return (0); +} + +/* + * Change filesystem quotas. + */ +#ifndef _SYS_SYSPROTO_H_ +struct quotactl_args { + char *path; + int cmd; + int uid; + caddr_t arg; +}; +#endif +/* ARGSUSED */ +int +quotactl(p, uap) + struct proc *p; + register struct quotactl_args /* { + syscallarg(char *) path; + syscallarg(int) cmd; + syscallarg(int) uid; + syscallarg(caddr_t) arg; + } */ *uap; +{ + register struct mount *mp; + int error; + struct nameidata nd; + + if (p->p_prison && !prison_allows(p->p_prison, JAIL_QUOTAS)) + return (EPERM); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + mp = nd.ni_vp->v_mount; + NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(nd.ni_vp); + if (p->p_prison && p->p_prison != mp->mnt_prison) + return (EPERM); + return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), + SCARG(uap, arg), p)); +} + +/* + * Get filesystem statistics. + */ +#ifndef _SYS_SYSPROTO_H_ +struct statfs_args { + char *path; + struct statfs *buf; +}; +#endif +/* ARGSUSED */ +int +statfs(p, uap) + struct proc *p; + register struct statfs_args /* { + syscallarg(char *) path; + syscallarg(struct statfs *) buf; + } */ *uap; +{ + register struct mount *mp; + register struct statfs *sp; + int error; + struct nameidata nd; + struct statfs sb; + char mntonname[MNAMELEN]; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + mp = nd.ni_vp->v_mount; + sp = &mp->mnt_stat; + NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(nd.ni_vp); + if (p->p_prison && !prison_valid_mount(p->p_prison, mp, mntonname)) + return (EPERM); + error = VFS_STATFS(mp, sp, p); + if (error) + return (error); + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (suser_xxx(p->p_ucred, 0, 0)) { + bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); + sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; + sp = &sb; + } + if (p->p_prison && *mntonname) { + if (sp != &sb) { + bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); + sp = &sb; + } + bcopy(mntonname, sb.f_mntonname, sizeof(mntonname)); + } + return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); +} + +/* + * Get filesystem statistics. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fstatfs_args { + int fd; + struct statfs *buf; +}; +#endif +/* ARGSUSED */ +int +fstatfs(p, uap) + struct proc *p; + register struct fstatfs_args /* { + syscallarg(int) fd; + syscallarg(struct statfs *) buf; + } */ *uap; +{ + struct file *fp; + struct mount *mp; + register struct statfs *sp; + int error; + struct statfs sb; + char mntonname[MNAMELEN]; + + if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + mp = ((struct vnode *)fp->f_data)->v_mount; + if (mp == NULL) + return (EBADF); + if (p->p_prison && !prison_valid_mount(p->p_prison, mp, mntonname)) + return (EPERM); + sp = &mp->mnt_stat; + error = VFS_STATFS(mp, sp, p); + if (error) + return (error); + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (suser_xxx(p->p_ucred, 0, 0)) { + bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); + sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; + sp = &sb; + } + if (p->p_prison && *mntonname) { + if (sp != &sb) { + bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); + sp = &sb; + } + bcopy(mntonname, sb.f_mntonname, sizeof(mntonname)); + } + return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); +} + +/* + * Get statistics on all filesystems. + */ +#ifndef _SYS_SYSPROTO_H_ +struct getfsstat_args { + struct statfs *buf; + long bufsize; + int flags; +}; +#endif +int +getfsstat(p, uap) + struct proc *p; + register struct getfsstat_args /* { + syscallarg(struct statfs *) buf; + syscallarg(long) bufsize; + syscallarg(int) flags; + } */ *uap; +{ + register struct mount *mp, *nmp; + struct statfs *sp, sb; + caddr_t sfsp; + long count, maxcount, error; + char mntonname[MNAMELEN]; + + maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); + sfsp = (caddr_t)SCARG(uap, buf); + count = 0; + simple_lock(&mountlist_slock); + for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { + if (p->p_prison && + !prison_valid_mount(p->p_prison, mp, mntonname)) { + nmp = TAILQ_NEXT(mp, mnt_list); + continue; + } + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = TAILQ_NEXT(mp, mnt_list); + continue; + } + if (sfsp && count < maxcount) { + sp = &mp->mnt_stat; + /* + * If MNT_NOWAIT or MNT_LAZY is specified, do not + * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY + * overrides MNT_WAIT. + */ + if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || + (SCARG(uap, flags) & MNT_WAIT)) && + (error = VFS_STATFS(mp, sp, p))) { + simple_lock(&mountlist_slock); + nmp = TAILQ_NEXT(mp, mnt_list); + vfs_unbusy(mp, p); + continue; + } + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (p->p_prison && *mntonname) { + bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); + bcopy(mntonname, sb.f_mntonname, + sizeof(mntonname)); + sp = &sb; + } + error = copyout((caddr_t)sp, sfsp, sizeof(*sp)); + if (error) { + vfs_unbusy(mp, p); + return (error); + } + + sfsp += sizeof(*sp); + } + count++; + simple_lock(&mountlist_slock); + nmp = TAILQ_NEXT(mp, mnt_list); + vfs_unbusy(mp, p); + } + simple_unlock(&mountlist_slock); + if (sfsp && count > maxcount) + p->p_retval[0] = maxcount; + else + p->p_retval[0] = count; + return (0); +} + +/* + * Change current working directory to a given file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fchdir_args { + int fd; +}; +#endif +/* ARGSUSED */ +int +fchdir(p, uap) + struct proc *p; + struct fchdir_args /* { + syscallarg(int) fd; + } */ *uap; +{ + register struct filedesc *fdp = p->p_fd; + struct vnode *vp, *tdp; + struct mount *mp; + struct file *fp; + int error; + + if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) + return (error); + vp = (struct vnode *)fp->f_data; + VREF(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if (vp->v_type != VDIR) + error = ENOTDIR; + else + error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); + while (!error && (mp = vp->v_mountedhere) != NULL) { + if (vfs_busy(mp, 0, 0, p)) + continue; + error = VFS_ROOT(mp, &tdp); + vfs_unbusy(mp, p); + if (error) + break; + vput(vp); + vp = tdp; + } + if (error) { + vput(vp); + return (error); + } + VOP_UNLOCK(vp, 0, p); + vrele(fdp->fd_cdir); + fdp->fd_cdir = vp; + return (0); +} + +/* + * Change current working directory (``.''). + */ +#ifndef _SYS_SYSPROTO_H_ +struct chdir_args { + char *path; +}; +#endif +/* ARGSUSED */ +int +chdir(p, uap) + struct proc *p; + struct chdir_args /* { + syscallarg(char *) path; + } */ *uap; +{ + register struct filedesc *fdp = p->p_fd; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = change_dir(&nd, p)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(fdp->fd_cdir); + fdp->fd_cdir = nd.ni_vp; + return (0); +} + +/* + * Helper function for raised chroot(2) security function: Refuse if + * any filedescriptors are open directories. + */ +static int +chroot_refuse_vdir_fds(fdp) + struct filedesc *fdp; +{ + struct vnode *vp; + struct file *fp; + int error; + int fd; + + for (fd = 0; fd < fdp->fd_nfiles ; fd++) { + error = getvnode(fdp, fd, &fp); + if (error) + continue; + vp = (struct vnode *)fp->f_data; + if (vp->v_type != VDIR) + continue; + return(EPERM); + } + return (0); +} + +/* + * This sysctl determines if we will allow a process to chroot(2) if it + * has a directory open: + * 0: disallowed for all processes. + * 1: allowed for processes that were not already chroot(2)'ed. + * 2: allowed for all processes. + */ + +static int chroot_allow_open_directories = 1; + +SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, + &chroot_allow_open_directories, 0, ""); + +/* + * Change notion of root (``/'') directory. + */ +#ifndef _SYS_SYSPROTO_H_ +struct chroot_args { + char *path; +}; +#endif +/* ARGSUSED */ +int +chroot(p, uap) + struct proc *p; + struct chroot_args /* { + syscallarg(char *) path; + } */ *uap; +{ + register struct filedesc *fdp = p->p_fd; + int error; + struct nameidata nd; + + error = suser_xxx(0, p, PRISON_ROOT); + if (error) + return (error); + if (chroot_allow_open_directories == 0 || + (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) + error = chroot_refuse_vdir_fds(fdp); + if (error) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = change_dir(&nd, p)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(fdp->fd_rdir); + fdp->fd_rdir = nd.ni_vp; + if (!fdp->fd_jdir) { + fdp->fd_jdir = nd.ni_vp; + VREF(fdp->fd_jdir); + } + return (0); +} + +/* + * Common routine for chroot and chdir. + */ +static int +change_dir(ndp, p) + register struct nameidata *ndp; + struct proc *p; +{ + struct vnode *vp; + int error; + + error = namei(ndp); + if (error) + return (error); + vp = ndp->ni_vp; + if (vp->v_type != VDIR) + error = ENOTDIR; + else + error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); + if (error) + vput(vp); + else + VOP_UNLOCK(vp, 0, p); + return (error); +} + +/* + * Check permissions, allocate an open file structure, + * and call the device open routine if any. + */ +#ifndef _SYS_SYSPROTO_H_ +struct open_args { + char *path; + int flags; + int mode; +}; +#endif +int +open(p, uap) + struct proc *p; + register struct open_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(int) mode; + } */ *uap; +{ + register struct filedesc *fdp = p->p_fd; + register struct file *fp; + register struct vnode *vp; + int cmode, flags, oflags; + struct file *nfp; + int type, indx, error; + struct flock lf; + struct nameidata nd; + + oflags = SCARG(uap, flags); + if ((oflags & O_ACCMODE) == O_ACCMODE) + return (EINVAL); + flags = FFLAGS(oflags); + error = falloc(p, &nfp, &indx); + if (error) + return (error); + fp = nfp; + cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + p->p_dupfd = -indx - 1; /* XXX check for fdopen */ + /* + * Bump the ref count to prevent another process from closing + * the descriptor while we are blocked in vn_open() + */ + fhold(fp); + error = vn_open(&nd, flags, cmode); + if (error) { + /* + * release our own reference + */ + fdrop(fp, p); + + /* + * handle special fdopen() case. bleh. dupfdopen() is + * responsible for dropping the old contents of ofiles[indx] + * if it succeeds. + */ + if ((error == ENODEV || error == ENXIO) && + p->p_dupfd >= 0 && /* XXX from fdopen */ + (error = + dupfdopen(p, fdp, indx, p->p_dupfd, flags, error)) == 0) { + p->p_retval[0] = indx; + return (0); + } + /* + * Clean up the descriptor, but only if another thread hadn't + * replaced or closed it. + */ + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + + if (error == ERESTART) + error = EINTR; + return (error); + } + p->p_dupfd = 0; + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + + /* + * There should be 2 references on the file, one from the descriptor + * table, and one for us. + * + * Handle the case where someone closed the file (via its file + * descriptor) while we were blocked. The end result should look + * like opening the file succeeded but it was immediately closed. + */ + if (fp->f_count == 1) { + KASSERT(fdp->fd_ofiles[indx] != fp, + ("Open file descriptor lost all refs")); + VOP_UNLOCK(vp, 0, p); + vn_close(vp, flags & FMASK, fp->f_cred, p); + fdrop(fp, p); + p->p_retval[0] = indx; + return 0; + } + + fp->f_data = (caddr_t)vp; + fp->f_flag = flags & FMASK; + fp->f_ops = &vnops; + fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); + if (flags & (O_EXLOCK | O_SHLOCK)) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + if (flags & O_EXLOCK) + lf.l_type = F_WRLCK; + else + lf.l_type = F_RDLCK; + type = F_FLOCK; + if ((flags & FNONBLOCK) == 0) + type |= F_WAIT; + VOP_UNLOCK(vp, 0, p); + if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { + /* + * lock request failed. Normally close the descriptor + * but handle the case where someone might have dup()d + * it when we weren't looking. One reference is + * owned by the descriptor array, the other by us. + */ + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + fdrop(fp, p); + return (error); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + fp->f_flag |= FHASLOCK; + } + /* assert that vn_open created a backing object if one is needed */ + KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0, + ("open: vmio vnode has no backing object after vn_open")); + VOP_UNLOCK(vp, 0, p); + + /* + * release our private reference, leaving the one associated with the + * descriptor table intact. + */ + fdrop(fp, p); + p->p_retval[0] = indx; + return (0); +} + +#ifdef COMPAT_43 +/* + * Create a file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct ocreat_args { + char *path; + int mode; +}; +#endif +int +ocreat(p, uap) + struct proc *p; + register struct ocreat_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; +{ + struct open_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(int) mode; + } */ nuap; + + struct nameidata nd; + int retVal = 0; + int error; + register struct vnode *vp; + + SCARG(&nuap, path) = SCARG(uap, path); + SCARG(&nuap, mode) = SCARG(uap, mode); + SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); + if ((error = namei(&nd)) != 0) + { + printf ("namei returned error %d in ocreat\n", error); + } + vp = nd.ni_dvp; + shd_write_entry (vp, "create"); + retVal = open(p, &nuap); + if (retVal) + { + printf ("open returned error %d in ocreat\n", retVal); + } + shd_write_exit (vp, "create"); + NDFREE(&nd, NDF_ONLY_PNBUF); + return retVal; +} +#endif /* COMPAT_43 */ + +/* + * Create a special file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct mknod_args { + char *path; + int mode; + int dev; +}; +#endif +/* ARGSUSED */ +int +mknod(p, uap) + struct proc *p; + register struct mknod_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + syscallarg(int) dev; + } */ *uap; +{ + register struct vnode *vp; + struct vattr vattr; + int error; + int whiteout = 0; + struct nameidata nd; + + switch (SCARG(uap, mode) & S_IFMT) { + case S_IFCHR: + case S_IFBLK: + error = suser(p); + break; + default: + error = suser_xxx(0, p, PRISON_ROOT); + break; + } + if (error) + return (error); + bwillwrite(); + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + if (vp != NULL) + error = EEXIST; + else { + VATTR_NULL(&vattr); + vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; + vattr.va_rdev = SCARG(uap, dev); + whiteout = 0; + + switch (SCARG(uap, mode) & S_IFMT) { + case S_IFMT: /* used by badsect to flag bad sectors */ + vattr.va_type = VBAD; + break; + case S_IFCHR: + vattr.va_type = VCHR; + break; + case S_IFBLK: + vattr.va_type = VBLK; + break; + case S_IFWHT: + whiteout = 1; + break; + default: + error = EINVAL; + break; + } + } + if (!error) { + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + if (whiteout) + { + shd_write_entry (nd.ni_dvp, "mknod"); + error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); + shd_write_exit (nd.ni_dvp, "mknod"); + } + else { + shd_write_entry (nd.ni_dvp, "mknod"); + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, + &nd.ni_cnd, &vattr); + shd_write_exit (nd.ni_dvp, "mknod"); + if (error == 0) + vput(nd.ni_vp); + } + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + } else { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp) + vrele(vp); + } + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod"); + return (error); +} + +/* + * Create a named pipe. + */ +#ifndef _SYS_SYSPROTO_H_ +struct mkfifo_args { + char *path; + int mode; +}; +#endif +/* ARGSUSED */ +int +mkfifo(p, uap) + struct proc *p; + register struct mkfifo_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; +{ + struct vattr vattr; + int error; + struct nameidata nd; + + bwillwrite(); + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + if (nd.ni_vp != NULL) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vrele(nd.ni_vp); + return (EEXIST); + } + VATTR_NULL(&vattr); + vattr.va_type = VFIFO; + vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + if (error == 0) + vput(nd.ni_vp); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + return (error); +} + +/* + * Make a hard file link. + */ +#ifndef _SYS_SYSPROTO_H_ +struct link_args { + char *path; + char *link; +}; +#endif +/* ARGSUSED */ +int +link(p, uap) + struct proc *p; + register struct link_args /* { + syscallarg(char *) path; + syscallarg(char *) link; + } */ *uap; +{ + register struct vnode *vp; + struct nameidata nd; + int error; + + bwillwrite(); + NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + if (vp->v_type == VDIR) + error = EPERM; /* POSIX */ + else { + NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); + error = namei(&nd); + if (!error) { + if (nd.ni_vp != NULL) { + if (nd.ni_vp) + vrele(nd.ni_vp); + error = EEXIST; + } else { + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, + LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + shd_write_entry (vp, "link"); + error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); + shd_write_entry (vp, "link"); + } + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + } + } + vrele(vp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "link"); + return (error); +} + +/* + * Make a symbolic link. + */ +#ifndef _SYS_SYSPROTO_H_ +struct symlink_args { + char *path; + char *link; +}; +#endif +/* ARGSUSED */ +int +symlink(p, uap) + struct proc *p; + register struct symlink_args /* { + syscallarg(char *) path; + syscallarg(char *) link; + } */ *uap; +{ + struct vattr vattr; + char *path; + int error; + struct nameidata nd; + + path = zalloc(namei_zone); + if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0) + goto out; + bwillwrite(); + NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); + if ((error = namei(&nd)) != 0) + goto out; + if (nd.ni_vp) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vrele(nd.ni_vp); + error = EEXIST; + goto out; + } + VATTR_NULL(&vattr); + vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + shd_write_entry (nd.ni_dvp, "symlink"); + error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); + shd_write_exit (nd.ni_dvp, "symlink"); + NDFREE(&nd, NDF_ONLY_PNBUF); + if (error == 0) + vput(nd.ni_vp); + vput(nd.ni_dvp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink"); +out: + zfree(namei_zone, path); + return (error); +} + +/* + * Delete a whiteout from the filesystem. + */ +/* ARGSUSED */ +int +undelete(p, uap) + struct proc *p; + register struct undelete_args /* { + syscallarg(char *) path; + } */ *uap; +{ + int error; + struct nameidata nd; + + bwillwrite(); + NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, + SCARG(uap, path), p); + error = namei(&nd); + if (error) + return (error); + + if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (nd.ni_vp) + vrele(nd.ni_vp); + return (EEXIST); + } + + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete"); + return (error); +} + +/* + * Delete a name from the filesystem. + */ +#ifndef _SYS_SYSPROTO_H_ +struct unlink_args { + char *path; +}; +#endif +/* ARGSUSED */ +int +unlink(p, uap) + struct proc *p; + struct unlink_args /* { + syscallarg(char *) path; + } */ *uap; +{ + register struct vnode *vp; + int error; + struct nameidata nd; + + bwillwrite(); + NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + + if (vp->v_type == VDIR) + error = EPERM; /* POSIX */ + else { + /* + * The root of a mounted filesystem cannot be deleted. + * + * XXX: can this only be a VDIR case? + */ + if (vp->v_flag & VROOT) + error = EBUSY; + } + + if (!error) { + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + shd_write_entry (vp, "unlink"); + error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); + shd_write_exit (vp, "unlink"); + } + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp != NULLVP) + vput(vp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink"); + return (error); +} + +/* + * Reposition read/write file offset. + */ +#ifndef _SYS_SYSPROTO_H_ +struct lseek_args { + int fd; + int pad; + off_t offset; + int whence; +}; +#endif +int +lseek(p, uap) + struct proc *p; + register struct lseek_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) offset; + syscallarg(int) whence; + } */ *uap; +{ + struct ucred *cred = p->p_ucred; + register struct filedesc *fdp = p->p_fd; + register struct file *fp; + struct vattr vattr; + int error; + + if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) + return (EBADF); + if (fp->f_type != DTYPE_VNODE) + return (ESPIPE); + switch (SCARG(uap, whence)) { + case L_INCR: + fp->f_offset += SCARG(uap, offset); + break; + case L_XTND: + error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p); + if (error) + return (error); + fp->f_offset = SCARG(uap, offset) + vattr.va_size; + break; + case L_SET: + fp->f_offset = SCARG(uap, offset); + break; + default: + return (EINVAL); + } + *(off_t *)(p->p_retval) = fp->f_offset; + return (0); +} + +#if defined(COMPAT_43) || defined(COMPAT_SUNOS) +/* + * Reposition read/write file offset. + */ +#ifndef _SYS_SYSPROTO_H_ +struct olseek_args { + int fd; + long offset; + int whence; +}; +#endif +int +olseek(p, uap) + struct proc *p; + register struct olseek_args /* { + syscallarg(int) fd; + syscallarg(long) offset; + syscallarg(int) whence; + } */ *uap; +{ + struct lseek_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) offset; + syscallarg(int) whence; + } */ nuap; + int error; + + SCARG(&nuap, fd) = SCARG(uap, fd); + SCARG(&nuap, offset) = SCARG(uap, offset); + SCARG(&nuap, whence) = SCARG(uap, whence); + error = lseek(p, &nuap); + return (error); +} +#endif /* COMPAT_43 */ + +/* + * Check access permissions. + */ +#ifndef _SYS_SYSPROTO_H_ +struct access_args { + char *path; + int flags; +}; +#endif +int +access(p, uap) + struct proc *p; + register struct access_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; +{ + struct ucred *cred, *tmpcred; + register struct vnode *vp; + int error, flags; + struct nameidata nd; + + cred = p->p_ucred; + /* + * Create and modify a temporary credential instead of one that + * is potentially shared. This could also mess up socket + * buffer accounting which can run in an interrupt context. + */ + tmpcred = crdup(cred); + tmpcred->cr_uid = p->p_cred->p_ruid; + tmpcred->cr_groups[0] = p->p_cred->p_rgid; + p->p_ucred = tmpcred; + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + goto out1; + vp = nd.ni_vp; + + /* Flags == 0 means only check for existence. */ + if (SCARG(uap, flags)) { + flags = 0; + if (SCARG(uap, flags) & R_OK) + flags |= VREAD; + if (SCARG(uap, flags) & W_OK) + flags |= VWRITE; + if (SCARG(uap, flags) & X_OK) + flags |= VEXEC; + if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) + error = VOP_ACCESS(vp, flags, tmpcred, p); + } + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(vp); +out1: + p->p_ucred = cred; + crfree(tmpcred); + return (error); +} + +#if defined(COMPAT_43) || defined(COMPAT_SUNOS) +/* + * Get file status; this version follows links. + */ +#ifndef _SYS_SYSPROTO_H_ +struct ostat_args { + char *path; + struct ostat *ub; +}; +#endif +/* ARGSUSED */ +int +ostat(p, uap) + struct proc *p; + register struct ostat_args /* { + syscallarg(char *) path; + syscallarg(struct ostat *) ub; + } */ *uap; +{ + struct stat sb; + struct ostat osb; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_stat(nd.ni_vp, &sb, p); + vput(nd.ni_vp); + if (error) + return (error); + cvtstat(&sb, &osb); + error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); + return (error); +} + +/* + * Get file status; this version does not follow links. + */ +#ifndef _SYS_SYSPROTO_H_ +struct olstat_args { + char *path; + struct ostat *ub; +}; +#endif +/* ARGSUSED */ +int +olstat(p, uap) + struct proc *p; + register struct olstat_args /* { + syscallarg(char *) path; + syscallarg(struct ostat *) ub; + } */ *uap; +{ + struct vnode *vp; + struct stat sb; + struct ostat osb; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + error = vn_stat(vp, &sb, p); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(vp); + if (error) + return (error); + cvtstat(&sb, &osb); + error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); + return (error); +} + +/* + * Convert from an old to a new stat structure. + */ +void +cvtstat(st, ost) + struct stat *st; + struct ostat *ost; +{ + + ost->st_dev = st->st_dev; + ost->st_ino = st->st_ino; + ost->st_mode = st->st_mode; + ost->st_nlink = st->st_nlink; + ost->st_uid = st->st_uid; + ost->st_gid = st->st_gid; + ost->st_rdev = st->st_rdev; + if (st->st_size < (quad_t)1 << 32) + ost->st_size = st->st_size; + else + ost->st_size = -2; + ost->st_atime = st->st_atime; + ost->st_mtime = st->st_mtime; + ost->st_ctime = st->st_ctime; + ost->st_blksize = st->st_blksize; + ost->st_blocks = st->st_blocks; + ost->st_flags = st->st_flags; + ost->st_gen = st->st_gen; +} +#endif /* COMPAT_43 || COMPAT_SUNOS */ + +/* + * Get file status; this version follows links. + */ +#ifndef _SYS_SYSPROTO_H_ +struct stat_args { + char *path; + struct stat *ub; +}; +#endif +/* ARGSUSED */ +int +stat(p, uap) + struct proc *p; + register struct stat_args /* { + syscallarg(char *) path; + syscallarg(struct stat *) ub; + } */ *uap; +{ + struct stat sb; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + error = vn_stat(nd.ni_vp, &sb, p); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_vp); + if (error) + return (error); + error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); + return (error); +} + +/* + * Get file status; this version does not follow links. + */ +#ifndef _SYS_SYSPROTO_H_ +struct lstat_args { + char *path; + struct stat *ub; +}; +#endif +/* ARGSUSED */ +int +lstat(p, uap) + struct proc *p; + register struct lstat_args /* { + syscallarg(char *) path; + syscallarg(struct stat *) ub; + } */ *uap; +{ + int error; + struct vnode *vp; + struct stat sb; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + error = vn_stat(vp, &sb, p); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(vp); + if (error) + return (error); + error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); + return (error); +} + +void +cvtnstat(sb, nsb) + struct stat *sb; + struct nstat *nsb; +{ + nsb->st_dev = sb->st_dev; + nsb->st_ino = sb->st_ino; + nsb->st_mode = sb->st_mode; + nsb->st_nlink = sb->st_nlink; + nsb->st_uid = sb->st_uid; + nsb->st_gid = sb->st_gid; + nsb->st_rdev = sb->st_rdev; + nsb->st_atimespec = sb->st_atimespec; + nsb->st_mtimespec = sb->st_mtimespec; + nsb->st_ctimespec = sb->st_ctimespec; + nsb->st_size = sb->st_size; + nsb->st_blocks = sb->st_blocks; + nsb->st_blksize = sb->st_blksize; + nsb->st_flags = sb->st_flags; + nsb->st_gen = sb->st_gen; + nsb->st_qspare[0] = sb->st_qspare[0]; + nsb->st_qspare[1] = sb->st_qspare[1]; +} + +#ifndef _SYS_SYSPROTO_H_ +struct nstat_args { + char *path; + struct nstat *ub; +}; +#endif +/* ARGSUSED */ +int +nstat(p, uap) + struct proc *p; + register struct nstat_args /* { + syscallarg(char *) path; + syscallarg(struct nstat *) ub; + } */ *uap; +{ + struct stat sb; + struct nstat nsb; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_stat(nd.ni_vp, &sb, p); + vput(nd.ni_vp); + if (error) + return (error); + cvtnstat(&sb, &nsb); + error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb)); + return (error); +} + +/* + * Get file status; this version does not follow links. + */ +#ifndef _SYS_SYSPROTO_H_ +struct lstat_args { + char *path; + struct stat *ub; +}; +#endif +/* ARGSUSED */ +int +nlstat(p, uap) + struct proc *p; + register struct nlstat_args /* { + syscallarg(char *) path; + syscallarg(struct nstat *) ub; + } */ *uap; +{ + int error; + struct vnode *vp; + struct stat sb; + struct nstat nsb; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_stat(vp, &sb, p); + vput(vp); + if (error) + return (error); + cvtnstat(&sb, &nsb); + error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb)); + return (error); +} + +/* + * Get configurable pathname variables. + */ +#ifndef _SYS_SYSPROTO_H_ +struct pathconf_args { + char *path; + int name; +}; +#endif +/* ARGSUSED */ +int +pathconf(p, uap) + struct proc *p; + register struct pathconf_args /* { + syscallarg(char *) path; + syscallarg(int) name; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval); + vput(nd.ni_vp); + return (error); +} + +/* + * Return target name of a symbolic link. + */ +#ifndef _SYS_SYSPROTO_H_ +struct readlink_args { + char *path; + char *buf; + int count; +}; +#endif +/* ARGSUSED */ +int +readlink(p, uap) + struct proc *p; + register struct readlink_args /* { + syscallarg(char *) path; + syscallarg(char *) buf; + syscallarg(int) count; + } */ *uap; +{ + register struct vnode *vp; + struct iovec aiov; + struct uio auio; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + if (vp->v_type != VLNK) + error = EINVAL; + else { + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; + auio.uio_resid = SCARG(uap, count); + error = VOP_READLINK(vp, &auio, p->p_ucred); + } + vput(vp); + p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; + return (error); +} + +static int +setfflags(p, vp, flags) + struct proc *p; + struct vnode *vp; + int flags; +{ + int error; + struct vattr vattr; + + /* + * Prevent non-root users from setting flags on devices. When + * a device is reused, users can retain ownership of the device + * if they are allowed to set flags and programs assume that + * chown can't fail when done as root. + */ + if ((vp->v_type == VCHR || vp->v_type == VBLK) && + ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0)) + return (error); + + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + VATTR_NULL(&vattr); + vattr.va_flags = flags; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + VOP_UNLOCK(vp, 0, p); + return (error); +} + +/* + * Change flags of a file given a path name. + */ +#ifndef _SYS_SYSPROTO_H_ +struct chflags_args { + char *path; + int flags; +}; +#endif +/* ARGSUSED */ +int +chflags(p, uap) + struct proc *p; + register struct chflags_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setfflags(p, nd.ni_vp, SCARG(uap, flags)); + vrele(nd.ni_vp); + return error; +} + +/* + * Change flags of a file given a file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fchflags_args { + int fd; + int flags; +}; +#endif +/* ARGSUSED */ +int +fchflags(p, uap) + struct proc *p; + register struct fchflags_args /* { + syscallarg(int) fd; + syscallarg(int) flags; + } */ *uap; +{ + struct file *fp; + int error; + + if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + return setfflags(p, (struct vnode *) fp->f_data, SCARG(uap, flags)); +} + +static int +setfmode(p, vp, mode) + struct proc *p; + struct vnode *vp; + int mode; +{ + int error; + struct vattr vattr; + + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + VATTR_NULL(&vattr); + vattr.va_mode = mode & ALLPERMS; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + VOP_UNLOCK(vp, 0, p); + return error; +} + +/* + * Change mode of a file given path name. + */ +#ifndef _SYS_SYSPROTO_H_ +struct chmod_args { + char *path; + int mode; +}; +#endif +/* ARGSUSED */ +int +chmod(p, uap) + struct proc *p; + register struct chmod_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setfmode(p, nd.ni_vp, SCARG(uap, mode)); + vrele(nd.ni_vp); + return error; +} + +/* + * Change mode of a file given path name (don't follow links.) + */ +#ifndef _SYS_SYSPROTO_H_ +struct lchmod_args { + char *path; + int mode; +}; +#endif +/* ARGSUSED */ +int +lchmod(p, uap) + struct proc *p; + register struct lchmod_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setfmode(p, nd.ni_vp, SCARG(uap, mode)); + vrele(nd.ni_vp); + return error; +} + +/* + * Change mode of a file given a file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fchmod_args { + int fd; + int mode; +}; +#endif +/* ARGSUSED */ +int +fchmod(p, uap) + struct proc *p; + register struct fchmod_args /* { + syscallarg(int) fd; + syscallarg(int) mode; + } */ *uap; +{ + struct file *fp; + int error; + + if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + return setfmode(p, (struct vnode *)fp->f_data, SCARG(uap, mode)); +} + +static int +setfown(p, vp, uid, gid) + struct proc *p; + struct vnode *vp; + uid_t uid; + gid_t gid; +{ + int error; + struct vattr vattr; + + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + VATTR_NULL(&vattr); + vattr.va_uid = uid; + vattr.va_gid = gid; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + VOP_UNLOCK(vp, 0, p); + return error; +} + +/* + * Set ownership given a path name. + */ +#ifndef _SYS_SYSPROTO_H_ +struct chown_args { + char *path; + int uid; + int gid; +}; +#endif +/* ARGSUSED */ +int +chown(p, uap) + struct proc *p; + register struct chown_args /* { + syscallarg(char *) path; + syscallarg(int) uid; + syscallarg(int) gid; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid)); + vrele(nd.ni_vp); + return (error); +} + +/* + * Set ownership given a path name, do not cross symlinks. + */ +#ifndef _SYS_SYSPROTO_H_ +struct lchown_args { + char *path; + int uid; + int gid; +}; +#endif +/* ARGSUSED */ +int +lchown(p, uap) + struct proc *p; + register struct lchown_args /* { + syscallarg(char *) path; + syscallarg(int) uid; + syscallarg(int) gid; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid)); + vrele(nd.ni_vp); + return (error); +} + +/* + * Set ownership given a file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fchown_args { + int fd; + int uid; + int gid; +}; +#endif +/* ARGSUSED */ +int +fchown(p, uap) + struct proc *p; + register struct fchown_args /* { + syscallarg(int) fd; + syscallarg(int) uid; + syscallarg(int) gid; + } */ *uap; +{ + struct file *fp; + int error; + + if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + return setfown(p, (struct vnode *)fp->f_data, + SCARG(uap, uid), SCARG(uap, gid)); +} + +static int +getutimes(usrtvp, tsp) + const struct timeval *usrtvp; + struct timespec *tsp; +{ + struct timeval tv[2]; + int error; + + if (usrtvp == NULL) { + microtime(&tv[0]); + TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); + tsp[1] = tsp[0]; + } else { + if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0) + return (error); + TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); + TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]); + } + return 0; +} + +static int +setutimes(p, vp, ts, nullflag) + struct proc *p; + struct vnode *vp; + const struct timespec *ts; + int nullflag; +{ + int error; + struct vattr vattr; + + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + VATTR_NULL(&vattr); + vattr.va_atime = ts[0]; + vattr.va_mtime = ts[1]; + if (nullflag) + vattr.va_vaflags |= VA_UTIMES_NULL; + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + VOP_UNLOCK(vp, 0, p); + return error; +} + +/* + * Set the access and modification times of a file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct utimes_args { + char *path; + struct timeval *tptr; +}; +#endif +/* ARGSUSED */ +int +utimes(p, uap) + struct proc *p; + register struct utimes_args /* { + syscallarg(char *) path; + syscallarg(struct timeval *) tptr; + } */ *uap; +{ + struct timespec ts[2]; + struct timeval *usrtvp; + int error; + struct nameidata nd; + + usrtvp = SCARG(uap, tptr); + if ((error = getutimes(usrtvp, ts)) != 0) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL); + vrele(nd.ni_vp); + return (error); +} + +/* + * Set the access and modification times of a file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct lutimes_args { + char *path; + struct timeval *tptr; +}; +#endif +/* ARGSUSED */ +int +lutimes(p, uap) + struct proc *p; + register struct lutimes_args /* { + syscallarg(char *) path; + syscallarg(struct timeval *) tptr; + } */ *uap; +{ + struct timespec ts[2]; + struct timeval *usrtvp; + int error; + struct nameidata nd; + + usrtvp = SCARG(uap, tptr); + if ((error = getutimes(usrtvp, ts)) != 0) + return (error); + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL); + vrele(nd.ni_vp); + return (error); +} + +/* + * Set the access and modification times of a file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct futimes_args { + int fd; + struct timeval *tptr; +}; +#endif +/* ARGSUSED */ +int +futimes(p, uap) + struct proc *p; + register struct futimes_args /* { + syscallarg(int ) fd; + syscallarg(struct timeval *) tptr; + } */ *uap; +{ + struct timespec ts[2]; + struct file *fp; + struct timeval *usrtvp; + int error; + + usrtvp = SCARG(uap, tptr); + if ((error = getutimes(usrtvp, ts)) != 0) + return (error); + if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + return setutimes(p, (struct vnode *)fp->f_data, ts, usrtvp == NULL); +} + +/* + * Truncate a file given its path name. + */ +#ifndef _SYS_SYSPROTO_H_ +struct truncate_args { + char *path; + int pad; + off_t length; +}; +#endif +/* ARGSUSED */ +int +truncate(p, uap) + struct proc *p; + register struct truncate_args /* { + syscallarg(char *) path; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ *uap; +{ + register struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + if (uap->length < 0) + return(EINVAL); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + NDFREE(&nd, NDF_ONLY_PNBUF); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if (vp->v_type == VDIR) + error = EISDIR; + else if ((error = vn_writechk(vp)) == 0 && + (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) { + VATTR_NULL(&vattr); + vattr.va_size = SCARG(uap, length); + error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + } + vput(vp); + return (error); +} + +/* + * Truncate a file given a file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct ftruncate_args { + int fd; + int pad; + off_t length; +}; +#endif +/* ARGSUSED */ +int +ftruncate(p, uap) + struct proc *p; + register struct ftruncate_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ *uap; +{ + struct vattr vattr; + struct vnode *vp; + struct file *fp; + int error; + + if (uap->length < 0) + return(EINVAL); + if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + if ((fp->f_flag & FWRITE) == 0) + return (EINVAL); + vp = (struct vnode *)fp->f_data; + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if (vp->v_type == VDIR) + error = EISDIR; + else if ((error = vn_writechk(vp)) == 0) { + VATTR_NULL(&vattr); + vattr.va_size = SCARG(uap, length); + error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); + } + VOP_UNLOCK(vp, 0, p); + return (error); +} + +#if defined(COMPAT_43) || defined(COMPAT_SUNOS) +/* + * Truncate a file given its path name. + */ +#ifndef _SYS_SYSPROTO_H_ +struct otruncate_args { + char *path; + long length; +}; +#endif +/* ARGSUSED */ +int +otruncate(p, uap) + struct proc *p; + register struct otruncate_args /* { + syscallarg(char *) path; + syscallarg(long) length; + } */ *uap; +{ + struct truncate_args /* { + syscallarg(char *) path; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ nuap; + + SCARG(&nuap, path) = SCARG(uap, path); + SCARG(&nuap, length) = SCARG(uap, length); + return (truncate(p, &nuap)); +} + +/* + * Truncate a file given a file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct oftruncate_args { + int fd; + long length; +}; +#endif +/* ARGSUSED */ +int +oftruncate(p, uap) + struct proc *p; + register struct oftruncate_args /* { + syscallarg(int) fd; + syscallarg(long) length; + } */ *uap; +{ + struct ftruncate_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ nuap; + + SCARG(&nuap, fd) = SCARG(uap, fd); + SCARG(&nuap, length) = SCARG(uap, length); + return (ftruncate(p, &nuap)); +} +#endif /* COMPAT_43 || COMPAT_SUNOS */ + +/* + * Sync an open file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fsync_args { + int fd; +}; +#endif +/* ARGSUSED */ +int +fsync(p, uap) + struct proc *p; + struct fsync_args /* { + syscallarg(int) fd; + } */ *uap; +{ + register struct vnode *vp; + struct file *fp; + vm_object_t obj; + int error; + + if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + vp = (struct vnode *)fp->f_data; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if (VOP_GETVOBJECT(vp, &obj) == 0) + vm_object_page_clean(obj, 0, 0, 0); + shd_write_entry (vp, "fsync"); + if ((error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p)) == 0 && + vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP) && + bioops.io_fsync) + error = (*bioops.io_fsync)(vp); + shd_write_exit(vp, "fsync"); + VOP_UNLOCK(vp, 0, p); + return (error); +} + +/* + * Rename files. Source and destination must either both be directories, + * or both not be directories. If target is a directory, it must be empty. + */ +#ifndef _SYS_SYSPROTO_H_ +struct rename_args { + char *from; + char *to; +}; +#endif +/* ARGSUSED */ +int +rename(p, uap) + struct proc *p; + register struct rename_args /* { + syscallarg(char *) from; + syscallarg(char *) to; + } */ *uap; +{ + register struct vnode *tvp, *fvp, *tdvp; + struct nameidata fromnd, tond; + int error; + + bwillwrite(); + NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, + SCARG(uap, from), p); + if ((error = namei(&fromnd)) != 0) + return (error); + fvp = fromnd.ni_vp; + NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ, + UIO_USERSPACE, SCARG(uap, to), p); + if (fromnd.ni_vp->v_type == VDIR) + tond.ni_cnd.cn_flags |= WILLBEDIR; + if ((error = namei(&tond)) != 0) { + /* Translate error code for rename("dir1", "dir2/."). */ + if (error == EISDIR && fvp->v_type == VDIR) + error = EINVAL; + NDFREE(&fromnd, NDF_ONLY_PNBUF); + vrele(fromnd.ni_dvp); + vrele(fvp); + goto out1; + } + tdvp = tond.ni_dvp; + tvp = tond.ni_vp; + if (tvp != NULL) { + if (fvp->v_type == VDIR && tvp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { + error = EISDIR; + goto out; + } + } + if (fvp == tdvp) + error = EINVAL; + /* + * If source is the same as the destination (that is the + * same inode number with the same name in the same directory), + * then there is nothing to do. + */ + if (fvp == tvp && fromnd.ni_dvp == tdvp && + fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && + !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, + fromnd.ni_cnd.cn_namelen)) + error = -1; +out: + if (!error) { + VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE); + if (fromnd.ni_dvp != tdvp) { + VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + } + if (tvp) { + VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); + } + shd_write_entry (fromnd.ni_dvp, "rename"); + shd_write_entry (tond.ni_dvp, "rename"); + error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, + tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); + shd_write_exit (fromnd.ni_dvp, "rename"); + shd_write_exit (tond.ni_dvp, "rename"); + NDFREE(&fromnd, NDF_ONLY_PNBUF); + NDFREE(&tond, NDF_ONLY_PNBUF); + } else { + NDFREE(&fromnd, NDF_ONLY_PNBUF); + NDFREE(&tond, NDF_ONLY_PNBUF); + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + vrele(fromnd.ni_dvp); + vrele(fvp); + } + vrele(tond.ni_startdir); + ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename"); + ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename"); + ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename"); + ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename"); +out1: + if (fromnd.ni_startdir) + vrele(fromnd.ni_startdir); + if (error == -1) + return (0); + return (error); +} + +/* + * Make a directory file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct mkdir_args { + char *path; + int mode; +}; +#endif +/* ARGSUSED */ +int +mkdir(p, uap) + struct proc *p; + register struct mkdir_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; +{ + register struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + bwillwrite(); + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); + nd.ni_cnd.cn_flags |= WILLBEDIR; + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + if (vp != NULL) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vrele(vp); + return (EEXIST); + } + VATTR_NULL(&vattr); + vattr.va_type = VDIR; + vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask; + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + shd_write_entry (nd.ni_dvp, "mkdir"); + error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + shd_write_exit (nd.ni_dvp, "mkdir"); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if (!error) + vput(nd.ni_vp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir"); + return (error); +} + +/* + * Remove a directory file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct rmdir_args { + char *path; +}; +#endif +/* ARGSUSED */ +int +rmdir(p, uap) + struct proc *p; + struct rmdir_args /* { + syscallarg(char *) path; + } */ *uap; +{ + register struct vnode *vp; + int error; + struct nameidata nd; + + bwillwrite(); + NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + if (vp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + /* + * No rmdir "." please. + */ + if (nd.ni_dvp == vp) { + error = EINVAL; + goto out; + } + /* + * The root of a mounted filesystem cannot be deleted. + */ + if (vp->v_flag & VROOT) + error = EBUSY; + else { + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + shd_write_entry (nd.ni_dvp, "rmdir"); + error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + shd_write_exit (nd.ni_dvp, "rmdir"); + } +out: + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp != NULLVP) + vput(vp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir"); + return (error); +} + +#ifdef COMPAT_43 +/* + * Read a block of directory entries in a file system independent format. + */ +#ifndef _SYS_SYSPROTO_H_ +struct ogetdirentries_args { + int fd; + char *buf; + u_int count; + long *basep; +}; +#endif +int +ogetdirentries(p, uap) + struct proc *p; + register struct ogetdirentries_args /* { + syscallarg(int) fd; + syscallarg(char *) buf; + syscallarg(u_int) count; + syscallarg(long *) basep; + } */ *uap; +{ + struct vnode *vp; + struct file *fp; + struct uio auio, kuio; + struct iovec aiov, kiov; + struct dirent *dp, *edp; + caddr_t dirbuf; + int error, eofflag, readcnt; + long loff; + + /* XXX arbitrary sanity limit on `count'. */ + if (SCARG(uap, count) > 64 * 1024) + return (EINVAL); + if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + if ((fp->f_flag & FREAD) == 0) + return (EBADF); + vp = (struct vnode *)fp->f_data; +unionread: + if (vp->v_type != VDIR) + return (EINVAL); + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; + auio.uio_resid = SCARG(uap, count); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + loff = auio.uio_offset = fp->f_offset; +# if (BYTE_ORDER != LITTLE_ENDIAN) + if (vp->v_mount->mnt_maxsymlinklen <= 0) { + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, + NULL, NULL); + fp->f_offset = auio.uio_offset; + } else +# endif + { + kuio = auio; + kuio.uio_iov = &kiov; + kuio.uio_segflg = UIO_SYSSPACE; + kiov.iov_len = SCARG(uap, count); + MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK); + kiov.iov_base = dirbuf; + error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, + NULL, NULL); + fp->f_offset = kuio.uio_offset; + if (error == 0) { + readcnt = SCARG(uap, count) - kuio.uio_resid; + edp = (struct dirent *)&dirbuf[readcnt]; + for (dp = (struct dirent *)dirbuf; dp < edp; ) { +# if (BYTE_ORDER == LITTLE_ENDIAN) + /* + * The expected low byte of + * dp->d_namlen is our dp->d_type. + * The high MBZ byte of dp->d_namlen + * is our dp->d_namlen. + */ + dp->d_type = dp->d_namlen; + dp->d_namlen = 0; +# else + /* + * The dp->d_type is the high byte + * of the expected dp->d_namlen, + * so must be zero'ed. + */ + dp->d_type = 0; +# endif + if (dp->d_reclen > 0) { + dp = (struct dirent *) + ((char *)dp + dp->d_reclen); + } else { + error = EIO; + break; + } + } + if (dp >= edp) + error = uiomove(dirbuf, readcnt, &auio); + } + FREE(dirbuf, M_TEMP); + } + VOP_UNLOCK(vp, 0, p); + if (error) + return (error); + if (SCARG(uap, count) == auio.uio_resid) { + if (union_dircheckp) { + error = union_dircheckp(p, &vp, fp); + if (error == -1) + goto unionread; + if (error) + return (error); + } + if ((vp->v_flag & VROOT) && + (vp->v_mount->mnt_flag & MNT_UNION)) { + struct vnode *tvp = vp; + vp = vp->v_mount->mnt_vnodecovered; + VREF(vp); + fp->f_data = (caddr_t) vp; + fp->f_offset = 0; + vrele(tvp); + goto unionread; + } + } + error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), + sizeof(long)); + p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; + return (error); +} +#endif /* COMPAT_43 */ + +/* + * Read a block of directory entries in a file system independent format. + */ +#ifndef _SYS_SYSPROTO_H_ +struct getdirentries_args { + int fd; + char *buf; + u_int count; + long *basep; +}; +#endif +int +getdirentries(p, uap) + struct proc *p; + register struct getdirentries_args /* { + syscallarg(int) fd; + syscallarg(char *) buf; + syscallarg(u_int) count; + syscallarg(long *) basep; + } */ *uap; +{ + struct vnode *vp; + struct file *fp; + struct uio auio; + struct iovec aiov; + long loff; + int error, eofflag; + + if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + if ((fp->f_flag & FREAD) == 0) + return (EBADF); + vp = (struct vnode *)fp->f_data; +unionread: + if (vp->v_type != VDIR) + return (EINVAL); + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; + auio.uio_resid = SCARG(uap, count); + /* vn_lock(vp, LK_SHARED | LK_RETRY, p); */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + loff = auio.uio_offset = fp->f_offset; + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); + fp->f_offset = auio.uio_offset; + VOP_UNLOCK(vp, 0, p); + if (error) + return (error); + if (SCARG(uap, count) == auio.uio_resid) { + if (union_dircheckp) { + error = union_dircheckp(p, &vp, fp); + if (error == -1) + goto unionread; + if (error) + return (error); + } + if ((vp->v_flag & VROOT) && + (vp->v_mount->mnt_flag & MNT_UNION)) { + struct vnode *tvp = vp; + vp = vp->v_mount->mnt_vnodecovered; + VREF(vp); + fp->f_data = (caddr_t) vp; + fp->f_offset = 0; + vrele(tvp); + goto unionread; + } + } + if (SCARG(uap, basep) != NULL) { + error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), + sizeof(long)); + } + p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; + return (error); +} +#ifndef _SYS_SYSPROTO_H_ +struct getdents_args { + int fd; + char *buf; + size_t count; +}; +#endif +int +getdents(p, uap) + struct proc *p; + register struct getdents_args /* { + syscallarg(int) fd; + syscallarg(char *) buf; + syscallarg(u_int) count; + } */ *uap; +{ + struct getdirentries_args ap; + ap.fd = uap->fd; + ap.buf = uap->buf; + ap.count = uap->count; + ap.basep = NULL; + return getdirentries(p, &ap); +} + +/* + * Set the mode mask for creation of filesystem nodes. + * + * MP SAFE + */ +#ifndef _SYS_SYSPROTO_H_ +struct umask_args { + int newmask; +}; +#endif +int +umask(p, uap) + struct proc *p; + struct umask_args /* { + syscallarg(int) newmask; + } */ *uap; +{ + register struct filedesc *fdp; + + fdp = p->p_fd; + p->p_retval[0] = fdp->fd_cmask; + fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; + return (0); +} + +/* + * Void all references to file by ripping underlying filesystem + * away from vnode. + */ +#ifndef _SYS_SYSPROTO_H_ +struct revoke_args { + char *path; +}; +#endif +/* ARGSUSED */ +int +revoke(p, uap) + struct proc *p; + register struct revoke_args /* { + syscallarg(char *) path; + } */ *uap; +{ + register struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + NDFREE(&nd, NDF_ONLY_PNBUF); + if (vp->v_type != VCHR && vp->v_type != VBLK) { + error = EINVAL; + goto out; + } + if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0) + goto out; + if (p->p_ucred->cr_uid != vattr.va_uid && + (error = suser_xxx(0, p, PRISON_ROOT))) + goto out; + if (vcount(vp) > 1) + VOP_REVOKE(vp, REVOKEALL); +out: + vrele(vp); + return (error); +} + +/* + * Convert a user file descriptor to a kernel file entry. + */ +int +getvnode(fdp, fd, fpp) + struct filedesc *fdp; + int fd; + struct file **fpp; +{ + struct file *fp; + + if ((u_int)fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL) + return (EBADF); + if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) + return (EINVAL); + *fpp = fp; + return (0); +} +/* + * Get (NFS) file handle + */ +#ifndef _SYS_SYSPROTO_H_ +struct getfh_args { + char *fname; + fhandle_t *fhp; +}; +#endif +int +getfh(p, uap) + struct proc *p; + register struct getfh_args *uap; +{ + struct nameidata nd; + fhandle_t fh; + register struct vnode *vp; + int error; + + /* + * Must be super user + */ + error = suser(p); + if (error) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p); + error = namei(&nd); + if (error) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + bzero(&fh, sizeof(fh)); + fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; + error = VFS_VPTOFH(vp, &fh.fh_fid); + vput(vp); + if (error) + return (error); + error = copyout(&fh, uap->fhp, sizeof (fh)); + return (error); +} + +/* + * syscall for the rpc.lockd to use to translate a NFS file handle into + * an open descriptor. + * + * warning: do not remove the suser() call or this becomes one giant + * security hole. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fhopen_args { + const struct fhandle *u_fhp; + int flags; +}; +#endif +int +fhopen(p, uap) + struct proc *p; + struct fhopen_args /* { + syscallarg(const struct fhandle *) u_fhp; + syscallarg(int) flags; + } */ *uap; +{ + struct mount *mp; + struct vnode *vp; + struct fhandle fhp; + struct vattr vat; + struct vattr *vap = &vat; + struct flock lf; + struct file *fp; + register struct filedesc *fdp = p->p_fd; + int fmode, mode, error, type; + struct file *nfp; + int indx; + + /* + * Must be super user + */ + error = suser(p); + if (error) + return (error); + + fmode = FFLAGS(SCARG(uap, flags)); + /* why not allow a non-read/write open for our lockd? */ + if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) + return (EINVAL); + error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp)); + if (error) + return(error); + /* find the mount point */ + mp = vfs_getvfs(&fhp.fh_fsid); + if (mp == NULL) + return (ESTALE); + /* now give me my vnode, it gets returned to me locked */ + error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); + if (error) + return (error); + /* + * from now on we have to make sure not + * to forget about the vnode + * any error that causes an abort must vput(vp) + * just set error = err and 'goto bad;'. + */ + + /* + * from vn_open + */ + if (vp->v_type == VLNK) { + error = EMLINK; + goto bad; + } + if (vp->v_type == VSOCK) { + error = EOPNOTSUPP; + goto bad; + } + mode = 0; + if (fmode & (FWRITE | O_TRUNC)) { + if (vp->v_type == VDIR) { + error = EISDIR; + goto bad; + } + error = vn_writechk(vp); + if (error) + goto bad; + mode |= VWRITE; + } + if (fmode & FREAD) + mode |= VREAD; + if (mode) { + error = VOP_ACCESS(vp, mode, p->p_ucred, p); + if (error) + goto bad; + } + if (fmode & O_TRUNC) { + VOP_UNLOCK(vp, 0, p); /* XXX */ + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ + VATTR_NULL(vap); + vap->va_size = 0; + error = VOP_SETATTR(vp, vap, p->p_ucred, p); + if (error) + goto bad; + } + error = VOP_OPEN(vp, fmode, p->p_ucred, p); + if (error) + goto bad; + /* + * Make sure that a VM object is created for VMIO support. + */ + if (vn_canvmio(vp) == TRUE) { + if ((error = vfs_object_create(vp, p, p->p_ucred)) != 0) + goto bad; + } + if (fmode & FWRITE) + vp->v_writecount++; + + /* + * end of vn_open code + */ + + if ((error = falloc(p, &nfp, &indx)) != 0) { + if (fmode & FWRITE) + vp->v_writecount--; + goto bad; + } + fp = nfp; + + /* + * hold an extra reference to avoid having fp ripped out + * from under us while we block in the lock op. + */ + fhold(fp); + nfp->f_data = (caddr_t)vp; + nfp->f_flag = fmode & FMASK; + nfp->f_ops = &vnops; + nfp->f_type = DTYPE_VNODE; + if (fmode & (O_EXLOCK | O_SHLOCK)) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + if (fmode & O_EXLOCK) + lf.l_type = F_WRLCK; + else + lf.l_type = F_RDLCK; + type = F_FLOCK; + if ((fmode & FNONBLOCK) == 0) + type |= F_WAIT; + VOP_UNLOCK(vp, 0, p); + if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { + /* + * lock request failed. Normally close the descriptor + * but handle the case where someone might have dup()d + * or close()d it when we weren't looking. + */ + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + + /* + * release our private reference. + */ + fdrop(fp, p); + return (error); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + fp->f_flag |= FHASLOCK; + } + if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0)) + vfs_object_create(vp, p, p->p_ucred); + + VOP_UNLOCK(vp, 0, p); + fdrop(fp, p); + p->p_retval[0] = indx; + return (0); + +bad: + vput(vp); + return (error); +} + +#ifndef _SYS_SYSPROTO_H_ +struct fhstat_args { + struct fhandle *u_fhp; + struct stat *sb; +}; +#endif +int +fhstat(p, uap) + struct proc *p; + register struct fhstat_args /* { + syscallarg(struct fhandle *) u_fhp; + syscallarg(struct stat *) sb; + } */ *uap; +{ + struct stat sb; + fhandle_t fh; + struct mount *mp; + struct vnode *vp; + int error; + + /* + * Must be super user + */ + error = suser(p); + if (error) + return (error); + + error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t)); + if (error) + return (error); + + if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) + return (ESTALE); + if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) + return (error); + error = vn_stat(vp, &sb, p); + vput(vp); + if (error) + return (error); + error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); + return (error); +} + +#ifndef _SYS_SYSPROTO_H_ +struct fhstatfs_args { + struct fhandle *u_fhp; + struct statfs *buf; +}; +#endif +int +fhstatfs(p, uap) + struct proc *p; + struct fhstatfs_args /* { + syscallarg(struct fhandle) *u_fhp; + syscallarg(struct statfs) *buf; + } */ *uap; +{ + struct statfs *sp; + struct mount *mp; + struct vnode *vp; + struct statfs sb; + fhandle_t fh; + int error; + + /* + * Must be super user + */ + if ((error = suser(p))) + return (error); + + if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0) + return (error); + + if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) + return (ESTALE); + if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) + return (error); + mp = vp->v_mount; + sp = &mp->mnt_stat; + vput(vp); + if ((error = VFS_STATFS(mp, sp, p)) != 0) + return (error); + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (suser_xxx(p->p_ucred, 0, 0)) { + bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); + sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; + sp = &sb; + } + return (copyout(sp, SCARG(uap, buf), sizeof(*sp))); +} + +/* + * Syscall to push extended attribute configuration information into the + * VFS. Accepts a path, which it converts to a mountpoint, as well as + * a command (int cmd), and attribute name and misc data. For now, the + * attribute name is left in userspace for consumption by the VFS_op. + * It will probably be changed to be copied into sysspace by the + * syscall in the future, once issues with various consumers of the + * attribute code have raised their hands. + * + * Currently this is used only by UFS Extended Attributes. + */ +int +extattrctl(p, uap) + struct proc *p; + struct extattrctl_args *uap; +{ + struct nameidata nd; + struct mount *mp; + int error; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + mp = nd.ni_vp->v_mount; + NDFREE(&nd, 0); + return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname), + SCARG(uap, arg), p)); +} + +/* + * Syscall to set a named extended attribute on a file or directory. + * Accepts attribute name, and a uio structure pointing to the data to set. + * The uio is consumed in the style of writev(). The real work happens + * in VOP_SETEXTATTR(). + */ +int +extattr_set_file(p, uap) + struct proc *p; + struct extattr_set_file_args *uap; +{ + struct nameidata nd; + struct uio auio; + struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; + char attrname[EXTATTR_MAXNAMELEN]; + u_int iovlen, cnt; + int error, i; + + error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN); + if (error) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return(error); + iovlen = uap->iovcnt * sizeof(struct iovec); + if (uap->iovcnt > UIO_SMALLIOV) { + if (uap->iovcnt > UIO_MAXIOV) { + error = EINVAL; + goto done; + } + MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); + needfree = iov; + } else + iov = aiov; + auio.uio_iov = iov; + auio.uio_iovcnt = uap->iovcnt; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; + auio.uio_offset = 0; + if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) + goto done; + auio.uio_resid = 0; + for (i = 0; i < uap->iovcnt; i++) { + if (iov->iov_len > INT_MAX - auio.uio_resid) { + error = EINVAL; + goto done; + } + auio.uio_resid += iov->iov_len; + iov++; + } + cnt = auio.uio_resid; + shd_write_entry (nd.ni_vp, "extattr_set_file"); + error = VOP_SETEXTATTR(nd.ni_vp, attrname, &auio, p->p_cred->pc_ucred, + p); + shd_write_exit (nd.ni_vp, "extattr_set_file"); + cnt -= auio.uio_resid; + p->p_retval[0] = cnt; +done: + if (needfree) + FREE(needfree, M_IOV); + NDFREE(&nd, 0); + return (error); +} + +/* + * Syscall to get a named extended attribute on a file or directory. + * Accepts attribute name, and a uio structure pointing to a buffer for the + * data. The uio is consumed in the style of readv(). The real work + * happens in VOP_GETEXTATTR(); + */ +int +extattr_get_file(p, uap) + struct proc *p; + struct extattr_get_file_args *uap; +{ + struct nameidata nd; + struct uio auio; + struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; + char attrname[EXTATTR_MAXNAMELEN]; + u_int iovlen, cnt; + int error, i; + + error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN); + if (error) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return (error); + iovlen = uap->iovcnt * sizeof (struct iovec); + if (uap->iovcnt > UIO_SMALLIOV) { + if (uap->iovcnt > UIO_MAXIOV) { + NDFREE(&nd, 0); + return (EINVAL); + } + MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); + needfree = iov; + } else { + iov = aiov; + needfree = NULL; + } + auio.uio_iov = iov; + auio.uio_iovcnt = uap->iovcnt; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_procp = p; + auio.uio_offset = 0; + if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) + goto done; + auio.uio_resid = 0; + for (i = 0; i < uap->iovcnt; i++) { + if (iov->iov_len > INT_MAX - auio.uio_resid) { + error = EINVAL; + goto done; + } + auio.uio_resid += iov->iov_len; + iov++; + } + cnt = auio.uio_resid; + error = VOP_GETEXTATTR(nd.ni_vp, attrname, &auio, p->p_cred->pc_ucred, + p); + cnt -= auio.uio_resid; + p->p_retval[0] = cnt; +done: + if (needfree) + FREE(needfree, M_IOV); + NDFREE(&nd, 0); + return(error); +} + +/* + * Syscall to delete a named extended attribute from a file or directory. + * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). + */ +int +extattr_delete_file(p, uap) + struct proc *p; + struct extattr_delete_file_args *uap; +{ + struct nameidata nd; + char attrname[EXTATTR_MAXNAMELEN]; + int error; + + error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN); + if (error) + return(error); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if ((error = namei(&nd)) != 0) + return(error); + error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred, + p); + NDFREE(&nd, 0); + return(error); +}