| /*** |
| This file is part of systemd. |
| |
| Copyright 2014 Lennart Poettering |
| |
| systemd is free software; you can redistribute it and/or modify it |
| under the terms of the GNU Lesser General Public License as published by |
| the Free Software Foundation; either version 2.1 of the License, or |
| (at your option) any later version. |
| |
| systemd is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Lesser General Public License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public License |
| along with systemd; If not, see <http://www.gnu.org/licenses/>. |
| ***/ |
| |
| #include <errno.h> |
| #include <seccomp.h> |
| #include <stddef.h> |
| #include <sys/prctl.h> |
| #include <linux/seccomp.h> |
| |
| #include "macro.h" |
| #include "seccomp-util.h" |
| #include "string-util.h" |
| #include "util.h" |
| |
| const char* seccomp_arch_to_string(uint32_t c) { |
| /* Maintain order used in <seccomp.h>. |
| * |
| * Names used here should be the same as those used for ConditionArchitecture=, |
| * except for "subarchitectures" like x32. */ |
| |
| switch(c) { |
| case SCMP_ARCH_NATIVE: |
| return "native"; |
| case SCMP_ARCH_X86: |
| return "x86"; |
| case SCMP_ARCH_X86_64: |
| return "x86-64"; |
| case SCMP_ARCH_X32: |
| return "x32"; |
| case SCMP_ARCH_ARM: |
| return "arm"; |
| case SCMP_ARCH_AARCH64: |
| return "arm64"; |
| case SCMP_ARCH_MIPS: |
| return "mips"; |
| case SCMP_ARCH_MIPS64: |
| return "mips64"; |
| case SCMP_ARCH_MIPS64N32: |
| return "mips64-n32"; |
| case SCMP_ARCH_MIPSEL: |
| return "mips-le"; |
| case SCMP_ARCH_MIPSEL64: |
| return "mips64-le"; |
| case SCMP_ARCH_MIPSEL64N32: |
| return "mips64-le-n32"; |
| case SCMP_ARCH_PPC: |
| return "ppc"; |
| case SCMP_ARCH_PPC64: |
| return "ppc64"; |
| case SCMP_ARCH_PPC64LE: |
| return "ppc64-le"; |
| case SCMP_ARCH_S390: |
| return "s390"; |
| case SCMP_ARCH_S390X: |
| return "s390x"; |
| default: |
| return NULL; |
| } |
| } |
| |
| int seccomp_arch_from_string(const char *n, uint32_t *ret) { |
| if (!n) |
| return -EINVAL; |
| |
| assert(ret); |
| |
| if (streq(n, "native")) |
| *ret = SCMP_ARCH_NATIVE; |
| else if (streq(n, "x86")) |
| *ret = SCMP_ARCH_X86; |
| else if (streq(n, "x86-64")) |
| *ret = SCMP_ARCH_X86_64; |
| else if (streq(n, "x32")) |
| *ret = SCMP_ARCH_X32; |
| else if (streq(n, "arm")) |
| *ret = SCMP_ARCH_ARM; |
| else if (streq(n, "arm64")) |
| *ret = SCMP_ARCH_AARCH64; |
| else if (streq(n, "mips")) |
| *ret = SCMP_ARCH_MIPS; |
| else if (streq(n, "mips64")) |
| *ret = SCMP_ARCH_MIPS64; |
| else if (streq(n, "mips64-n32")) |
| *ret = SCMP_ARCH_MIPS64N32; |
| else if (streq(n, "mips-le")) |
| *ret = SCMP_ARCH_MIPSEL; |
| else if (streq(n, "mips64-le")) |
| *ret = SCMP_ARCH_MIPSEL64; |
| else if (streq(n, "mips64-le-n32")) |
| *ret = SCMP_ARCH_MIPSEL64N32; |
| else if (streq(n, "ppc")) |
| *ret = SCMP_ARCH_PPC; |
| else if (streq(n, "ppc64")) |
| *ret = SCMP_ARCH_PPC64; |
| else if (streq(n, "ppc64-le")) |
| *ret = SCMP_ARCH_PPC64LE; |
| else if (streq(n, "s390")) |
| *ret = SCMP_ARCH_S390; |
| else if (streq(n, "s390x")) |
| *ret = SCMP_ARCH_S390X; |
| else |
| return -EINVAL; |
| |
| return 0; |
| } |
| |
| int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action) { |
| scmp_filter_ctx seccomp; |
| int r; |
| |
| /* Much like seccomp_init(), but tries to be a bit more conservative in its defaults: all secondary archs are |
| * added by default, and NNP is turned off. */ |
| |
| seccomp = seccomp_init(default_action); |
| if (!seccomp) |
| return -ENOMEM; |
| |
| r = seccomp_add_secondary_archs(seccomp); |
| if (r < 0) |
| goto finish; |
| |
| r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); |
| if (r < 0) |
| goto finish; |
| |
| *ret = seccomp; |
| return 0; |
| |
| finish: |
| seccomp_release(seccomp); |
| return r; |
| } |
| |
| int seccomp_add_secondary_archs(scmp_filter_ctx ctx) { |
| |
| /* Add in all possible secondary archs we are aware of that |
| * this kernel might support. */ |
| |
| static const int seccomp_arches[] = { |
| #if defined(__i386__) || defined(__x86_64__) |
| SCMP_ARCH_X86, |
| SCMP_ARCH_X86_64, |
| SCMP_ARCH_X32, |
| |
| #elif defined(__arm__) || defined(__aarch64__) |
| SCMP_ARCH_ARM, |
| SCMP_ARCH_AARCH64, |
| |
| #elif defined(__arm__) || defined(__aarch64__) |
| SCMP_ARCH_ARM, |
| SCMP_ARCH_AARCH64, |
| |
| #elif defined(__mips__) || defined(__mips64__) |
| SCMP_ARCH_MIPS, |
| SCMP_ARCH_MIPS64, |
| SCMP_ARCH_MIPS64N32, |
| SCMP_ARCH_MIPSEL, |
| SCMP_ARCH_MIPSEL64, |
| SCMP_ARCH_MIPSEL64N32, |
| |
| #elif defined(__powerpc__) || defined(__powerpc64__) |
| SCMP_ARCH_PPC, |
| SCMP_ARCH_PPC64, |
| SCMP_ARCH_PPC64LE, |
| |
| #elif defined(__s390__) || defined(__s390x__) |
| SCMP_ARCH_S390, |
| SCMP_ARCH_S390X, |
| #endif |
| }; |
| |
| unsigned i; |
| int r; |
| |
| for (i = 0; i < ELEMENTSOF(seccomp_arches); i++) { |
| r = seccomp_arch_add(ctx, seccomp_arches[i]); |
| if (r < 0 && r != -EEXIST) |
| return r; |
| } |
| |
| return 0; |
| } |
| |
| static bool is_basic_seccomp_available(void) { |
| int r; |
| r = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); |
| return r >= 0; |
| } |
| |
| static bool is_seccomp_filter_available(void) { |
| int r; |
| r = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0); |
| return r < 0 && errno == EFAULT; |
| } |
| |
| bool is_seccomp_available(void) { |
| static int cached_enabled = -1; |
| if (cached_enabled < 0) |
| cached_enabled = is_basic_seccomp_available() && is_seccomp_filter_available(); |
| return cached_enabled; |
| } |
| |
| const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { |
| [SYSCALL_FILTER_SET_BASIC_IO] = { |
| /* Basic IO */ |
| .name = "@basic-io", |
| .value = |
| "close\0" |
| "dup2\0" |
| "dup3\0" |
| "dup\0" |
| "lseek\0" |
| "pread64\0" |
| "preadv\0" |
| "pwrite64\0" |
| "pwritev\0" |
| "read\0" |
| "readv\0" |
| "write\0" |
| "writev\0" |
| }, |
| [SYSCALL_FILTER_SET_CLOCK] = { |
| /* Clock */ |
| .name = "@clock", |
| .value = |
| "adjtimex\0" |
| "clock_adjtime\0" |
| "clock_settime\0" |
| "settimeofday\0" |
| "stime\0" |
| }, |
| [SYSCALL_FILTER_SET_CPU_EMULATION] = { |
| /* CPU emulation calls */ |
| .name = "@cpu-emulation", |
| .value = |
| "modify_ldt\0" |
| "subpage_prot\0" |
| "switch_endian\0" |
| "vm86\0" |
| "vm86old\0" |
| }, |
| [SYSCALL_FILTER_SET_DEBUG] = { |
| /* Debugging/Performance Monitoring/Tracing */ |
| .name = "@debug", |
| .value = |
| "lookup_dcookie\0" |
| "perf_event_open\0" |
| "process_vm_readv\0" |
| "process_vm_writev\0" |
| "ptrace\0" |
| "rtas\0" |
| #ifdef __NR_s390_runtime_instr |
| "s390_runtime_instr\0" |
| #endif |
| "sys_debug_setcontext\0" |
| }, |
| [SYSCALL_FILTER_SET_DEFAULT] = { |
| /* Default list: the most basic of operations */ |
| .name = "@default", |
| .value = |
| "clock_getres\0" |
| "clock_gettime\0" |
| "clock_nanosleep\0" |
| "execve\0" |
| "exit\0" |
| "exit_group\0" |
| "getrlimit\0" /* make sure processes can query stack size and such */ |
| "gettimeofday\0" |
| "nanosleep\0" |
| "pause\0" |
| "rt_sigreturn\0" |
| "sigreturn\0" |
| "time\0" |
| }, |
| [SYSCALL_FILTER_SET_IO_EVENT] = { |
| /* Event loop use */ |
| .name = "@io-event", |
| .value = |
| "_newselect\0" |
| "epoll_create1\0" |
| "epoll_create\0" |
| "epoll_ctl\0" |
| "epoll_ctl_old\0" |
| "epoll_pwait\0" |
| "epoll_wait\0" |
| "epoll_wait_old\0" |
| "eventfd2\0" |
| "eventfd\0" |
| "poll\0" |
| "ppoll\0" |
| "pselect6\0" |
| "select\0" |
| }, |
| [SYSCALL_FILTER_SET_IPC] = { |
| /* Message queues, SYSV IPC or other IPC */ |
| .name = "@ipc", |
| .value = "ipc\0" |
| "memfd_create\0" |
| "mq_getsetattr\0" |
| "mq_notify\0" |
| "mq_open\0" |
| "mq_timedreceive\0" |
| "mq_timedsend\0" |
| "mq_unlink\0" |
| "msgctl\0" |
| "msgget\0" |
| "msgrcv\0" |
| "msgsnd\0" |
| "pipe2\0" |
| "pipe\0" |
| "process_vm_readv\0" |
| "process_vm_writev\0" |
| "semctl\0" |
| "semget\0" |
| "semop\0" |
| "semtimedop\0" |
| "shmat\0" |
| "shmctl\0" |
| "shmdt\0" |
| "shmget\0" |
| }, |
| [SYSCALL_FILTER_SET_KEYRING] = { |
| /* Keyring */ |
| .name = "@keyring", |
| .value = |
| "add_key\0" |
| "keyctl\0" |
| "request_key\0" |
| }, |
| [SYSCALL_FILTER_SET_MODULE] = { |
| /* Kernel module control */ |
| .name = "@module", |
| .value = |
| "delete_module\0" |
| "finit_module\0" |
| "init_module\0" |
| }, |
| [SYSCALL_FILTER_SET_MOUNT] = { |
| /* Mounting */ |
| .name = "@mount", |
| .value = |
| "chroot\0" |
| "mount\0" |
| "pivot_root\0" |
| "umount2\0" |
| "umount\0" |
| }, |
| [SYSCALL_FILTER_SET_NETWORK_IO] = { |
| /* Network or Unix socket IO, should not be needed if not network facing */ |
| .name = "@network-io", |
| .value = |
| "accept4\0" |
| "accept\0" |
| "bind\0" |
| "connect\0" |
| "getpeername\0" |
| "getsockname\0" |
| "getsockopt\0" |
| "listen\0" |
| "recv\0" |
| "recvfrom\0" |
| "recvmmsg\0" |
| "recvmsg\0" |
| "send\0" |
| "sendmmsg\0" |
| "sendmsg\0" |
| "sendto\0" |
| "setsockopt\0" |
| "shutdown\0" |
| "socket\0" |
| "socketcall\0" |
| "socketpair\0" |
| }, |
| [SYSCALL_FILTER_SET_OBSOLETE] = { |
| /* Unusual, obsolete or unimplemented, some unknown even to libseccomp */ |
| .name = "@obsolete", |
| .value = |
| "_sysctl\0" |
| "afs_syscall\0" |
| "break\0" |
| "create_module\0" |
| "ftime\0" |
| "get_kernel_syms\0" |
| "getpmsg\0" |
| "gtty\0" |
| "lock\0" |
| "mpx\0" |
| "prof\0" |
| "profil\0" |
| "putpmsg\0" |
| "query_module\0" |
| "security\0" |
| "sgetmask\0" |
| "ssetmask\0" |
| "stty\0" |
| "sysfs\0" |
| "tuxcall\0" |
| "ulimit\0" |
| "uselib\0" |
| "ustat\0" |
| "vserver\0" |
| }, |
| [SYSCALL_FILTER_SET_PRIVILEGED] = { |
| /* Nice grab-bag of all system calls which need superuser capabilities */ |
| .name = "@privileged", |
| .value = |
| "@clock\0" |
| "@module\0" |
| "@raw-io\0" |
| "acct\0" |
| "bdflush\0" |
| "bpf\0" |
| "capset\0" |
| "chown32\0" |
| "chown\0" |
| "chroot\0" |
| "fchown32\0" |
| "fchown\0" |
| "fchownat\0" |
| "kexec_file_load\0" |
| "kexec_load\0" |
| "lchown32\0" |
| "lchown\0" |
| "nfsservctl\0" |
| "pivot_root\0" |
| "quotactl\0" |
| "reboot\0" |
| "setdomainname\0" |
| "setfsuid32\0" |
| "setfsuid\0" |
| "setgroups32\0" |
| "setgroups\0" |
| "sethostname\0" |
| "setresuid32\0" |
| "setresuid\0" |
| "setreuid32\0" |
| "setreuid\0" |
| "setuid32\0" |
| "setuid\0" |
| "swapoff\0" |
| "swapon\0" |
| "_sysctl\0" |
| "vhangup\0" |
| }, |
| [SYSCALL_FILTER_SET_PROCESS] = { |
| /* Process control, execution, namespaces */ |
| .name = "@process", |
| .value = |
| "arch_prctl\0" |
| "clone\0" |
| "execveat\0" |
| "fork\0" |
| "kill\0" |
| "prctl\0" |
| "setns\0" |
| "tgkill\0" |
| "tkill\0" |
| "unshare\0" |
| "vfork\0" |
| }, |
| [SYSCALL_FILTER_SET_RAW_IO] = { |
| /* Raw I/O ports */ |
| .name = "@raw-io", |
| .value = |
| "ioperm\0" |
| "iopl\0" |
| "pciconfig_iobase\0" |
| "pciconfig_read\0" |
| "pciconfig_write\0" |
| #ifdef __NR_s390_pci_mmio_read |
| "s390_pci_mmio_read\0" |
| #endif |
| #ifdef __NR_s390_pci_mmio_write |
| "s390_pci_mmio_write\0" |
| #endif |
| }, |
| [SYSCALL_FILTER_SET_RESOURCES] = { |
| /* Alter resource settings */ |
| .name = "@resources", |
| .value = |
| "sched_setparam\0" |
| "sched_setscheduler\0" |
| "sched_setaffinity\0" |
| "setpriority\0" |
| "setrlimit\0" |
| "set_mempolicy\0" |
| "migrate_pages\0" |
| "move_pages\0" |
| "mbind\0" |
| "sched_setattr\0" |
| "prlimit64\0" |
| }, |
| }; |
| |
| const SyscallFilterSet *syscall_filter_set_find(const char *name) { |
| unsigned i; |
| |
| if (isempty(name) || name[0] != '@') |
| return NULL; |
| |
| for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) |
| if (streq(syscall_filter_sets[i].name, name)) |
| return syscall_filter_sets + i; |
| |
| return NULL; |
| } |
| |
| int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action) { |
| const char *sys; |
| int r; |
| |
| assert(seccomp); |
| assert(set); |
| |
| NULSTR_FOREACH(sys, set->value) { |
| int id; |
| |
| if (sys[0] == '@') { |
| const SyscallFilterSet *other; |
| |
| other = syscall_filter_set_find(sys); |
| if (!other) |
| return -EINVAL; |
| |
| r = seccomp_add_syscall_filter_set(seccomp, other, action); |
| } else { |
| id = seccomp_syscall_resolve_name(sys); |
| if (id == __NR_SCMP_ERROR) |
| return -EINVAL; |
| |
| r = seccomp_rule_add(seccomp, action, id, 0); |
| } |
| if (r < 0) |
| return r; |
| } |
| |
| return 0; |
| } |
| |
| int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action) { |
| scmp_filter_ctx seccomp; |
| int r; |
| |
| assert(set); |
| |
| /* The one-stop solution: allocate a seccomp object, add a filter to it, and apply it */ |
| |
| r = seccomp_init_conservative(&seccomp, default_action); |
| if (r < 0) |
| return r; |
| |
| r = seccomp_add_syscall_filter_set(seccomp, set, action); |
| if (r < 0) |
| goto finish; |
| |
| r = seccomp_load(seccomp); |
| |
| finish: |
| seccomp_release(seccomp); |
| return r; |
| |
| } |