blob: 0146e13a2d6519c3721089e092dc835c5ea1f7ab [file] [log] [blame] [raw]
/*
* Copyright (C) 2000-2005 SWsoft. All rights reserved.
*
* This file may be distributed under the terms of the Q Public License
* as defined by Trolltech AS of Norway and appearing in the file
* LICENSE.QPL included in the packaging of this file.
*
* This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
* WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <string.h>
#include <sys/ioctl.h>
#include <asm/timex.h>
#include <linux/vzcalluser.h>
#include <sys/personality.h>
#include "vzerror.h"
#include "res.h"
#include "env.h"
#include "dist.h"
#include "exec.h"
#include "logger.h"
#include "util.h"
#include "script.h"
#include "iptables.h"
#include "readelf.h"
#define ENVRETRY 3
#define VZCTLDEV "/dev/vzctl"
#ifndef __NR_setluid
#ifdef __ia64__
#define __NR_setluid 1506
#elif __x86_64__
#define __NR_setluid 501
#else
#define __NR_setluid 511
#endif
#endif
static int env_stop(vps_handler *h, envid_t veid, char *root, int stop_mode);
static inline int setluid(uid_t uid)
{
return syscall(__NR_setluid, uid);
}
#ifdef __x86_64__
static int set_personality(unsigned long mask)
{
unsigned long per;
per = personality(0xffffffff) | mask;
logger(3, 0, "Set personality %#10.8x", per);
if (personality(per) == -1) {
logger(2, errno, "Unable to set personality PER_LINUX32");
return -1;
}
return 0;
}
static int set_personality32()
{
if (get_arch_from_elf("/sbin/init") != elf_32)
return 0;
return set_personality(PER_LINUX32);
}
#endif
int vz_env_create_data_ioctl(vps_handler *h,
struct vzctl_env_create_data *data)
{
int errcode;
int retry = 0;
do {
if (retry)
sleep(1);
errcode = ioctl(h->vzfd, VZCTL_ENV_CREATE_DATA, data);
} while (errcode < 0 && errno == EBUSY && retry++ < ENVRETRY);
#ifdef __x86_64__
/* Set personality PER_LINUX32 for i386 based VPSes */
if (errcode >= 0)
set_personality32();
#endif
return errcode;
}
int vz_env_create_ioctl(vps_handler *h, envid_t veid, int flags)
{
struct vzctl_env_create env_create;
int errcode;
int retry = 0;
memset(&env_create, 0, sizeof(env_create));
env_create.veid = veid;
env_create.flags = flags;
do {
if (retry)
sleep(1);
errcode = ioctl(h->vzfd, VZCTL_ENV_CREATE, &env_create);
} while (errcode < 0 && errno == EBUSY && retry++ < ENVRETRY);
#ifdef __x86_64__
/* Set personality PER_LINUX32 for i386 based VPSes */
if (errcode >= 0 && (flags & VE_ENTER))
set_personality32();
#endif
return errcode;
}
/** Allocate and inittialize VPS handler.
*
* @param veid VPS id.
* @return handler or NULL on error.
*/
vps_handler *vz_open(envid_t veid)
{
int vzfd;
vps_handler *h = NULL;
if ((vzfd = open(VZCTLDEV, O_RDWR)) < 0) {
logger(0, errno, "Unable to open %s", VZCTLDEV);
logger(0, 0, "Please check that vzdev kernel module is loaded"
" and you have sufficient permissions"
" to access the file.");
return NULL;
}
h = calloc(1, sizeof(*h));
if (h == NULL)
goto err;
h->vzfd = vzfd;
if (vz_env_create_ioctl(h, 0, 0) < 0 &&
(errno == ENOSYS || errno == EPERM))
{
logger(0, 0, "Your kernel lacks support for virtual"
" environments or modules not loaded");
goto err;
}
return h;
err:
if (h != NULL)
free(h);
close(vzfd);
return NULL;
}
/** Close VPS handler.
*
* @param h VPS handler.
*/
void vz_close(vps_handler *h)
{
if (h == NULL)
return;
close(h->vzfd);
free(h);
}
/** Get VPS status.
*
* @param h VPS handler.
* @param veid VPS id.
* @return 1 - VPS is running
* 0 - VPS is stopped.
*/
int vps_is_run(vps_handler *h, envid_t veid)
{
int ret;
int errno;
ret = vz_env_create_ioctl(h, veid, VE_TEST);
if (ret < 0 && (errno == ESRCH || errno == ENOTTY))
return 0;
else if (ret < 0)
logger(0, errno, "error on vz_env_create_ioctl(VE_TEST)");
return 1;
}
/** Change root to specified directory
*
* @param VPS root
* @return 0 on success
*/
int vz_chroot(char *root)
{
int i;
sigset_t sigset;
struct sigaction act;
if (root == NULL) {
logger(0, 0, "vz_chroot: VPS root is not specified");
return VZ_VE_ROOT_NOTSET;
}
if (chdir(root)) {
logger(0, errno, "unable to change dir to %s",
root);
return VZ_RESOURCE_ERROR;
}
if (chroot(root)) {
logger(0, errno, "chroot %s failed", root);
return VZ_RESOURCE_ERROR;
}
setsid();
sigemptyset(&sigset);
sigprocmask(SIG_SETMASK, &sigset, NULL);
sigemptyset(&act.sa_mask);
act.sa_handler = SIG_DFL;
act.sa_flags = 0;
for (i = 1; i <= NSIG; ++i)
sigaction(i, &act, NULL);
return 0;
}
int vz_setluid(envid_t veid)
{
if (setluid(veid) == -1)
return VZ_SETLUID_ERROR;
return 0;
}
/*
* Checks if sysfs needs to be enabled for this VPS.
* Now we do that only for distributions from sysfs_dists.
* FIXME: priovide a generic way to enable/disable sysfs per VPS
*/
static int sysfs_required(vps_res *res)
{
static char *sysfs_dists[] = {"opensuse", "suse", "sles", NULL};
tmpl_param *tmp = &res->tmpl;
int len, i;
char *name;
for (i = 0; name = sysfs_dists[i], name != NULL; i++) {
len = strlen(name);
if (tmp->ostmpl != NULL &&
!strncmp(tmp->ostmpl, name, len))
{
return 1;
}
if (tmp->dist != NULL &&
!strncmp(tmp->dist, name, len))
{
return 1;
}
}
return 0;
}
#ifdef VE_FEATURE_SYSFS
/* Kernel understands new style env. create struct - with features etc. */
#define KERNEL_HAVE_ENV_CREATE_PARAM2
#endif
static int _env_create(vps_handler *h, envid_t veid, int wait_p, int err_p,
void *data)
{
struct vzctl_env_create_data env_create_data;
#ifdef KERNEL_HAVE_ENV_CREATE_PARAM2
struct env_create_param2 create_param;
#else
struct env_create_param create_param;
#endif
int fd, ret;
vps_res *res;
char *argv[] = {"init", NULL};
char *envp[] = {"HOME=/", "TERM=linux", NULL};
int retry = 0;
res = (vps_res *) data;
memset(&create_param, 0, sizeof(create_param));
create_param.iptables_mask = get_ipt_mask(res->env.ipt_mask);
logger(3, 0, "Set iptables mask %#10.8x", create_param.iptables_mask);
env_create_data.veid = veid;
env_create_data.class_id = 0;
env_create_data.flags = VE_CREATE | VE_EXCLUSIVE;
env_create_data.data = &create_param;
env_create_data.datalen = sizeof(create_param);
#ifdef VE_FEATURE_SYSFS
if (sysfs_required(res))
create_param.feature_mask = VE_FEATURE_SYSFS;
#endif
try:
ret = vz_env_create_data_ioctl(h, &env_create_data);
if (ret < 0) {
switch(errno) {
case EINVAL:
ret = VZ_ENVCREATE_ERROR;
#ifdef KERNEL_HAVE_ENV_CREATE_PARAM2
/* Run-time kernel did not understand
* env_create_param2 -- so retry with
* the old env_create_param struct.
*/
env_create_data.datalen =
sizeof(struct env_create_param);
if (!retry++)
goto try;
#endif
break;
case EACCES:
/* License is not loaded */
ret = VZ_NO_ACCES;
break;
case ENOTTY:
/* Some vz modules are not present */
ret = VZ_BAD_KERNEL;
break;
default:
logger(0, errno, "env_create error");
ret = VZ_ENVCREATE_ERROR;
break;
}
goto env_err;
}
close(h->vzfd);
/* Create /fastboot to skip run fsck */
fd = open("/fastboot", O_CREAT | O_RDONLY);
close(fd);
mk_reboot_script();
if (res->dq.ugidlimit != NULL)
mk_quota_link();
/* Close status descriptor to report that
* environment is created.
*/
close(STDIN_FILENO);
/* Now we wait until VPS setup will be done
If no error start init otherwise exit.
*/
if (read(wait_p, &ret, sizeof(ret)) != 0)
return 0;
logger(10, 0, "Starting init");
execve("/sbin/init", argv, envp);
execve("/etc/init", argv, envp);
execve("/bin/init", argv, envp);
ret = VZ_FS_BAD_TMPL;
write(err_p, &ret, sizeof(ret));
env_err:
return ret;
}
static int vz_real_env_create(vps_handler *h, envid_t veid, vps_res *res,
int wait_p, int err_p, env_create_FN fn, void *data)
{
int ret, pid;
if ((ret = vz_chroot(res->fs.root)))
return ret;
if ((ret = vz_setluid(veid)))
return ret;
if ((ret = set_ublimit(h, veid, &res->ub)))
return ret;
/* Create another process to proper resource accounting */
if ((pid = fork()) < 0) {
logger(0, errno, "Unable to fork");
return VZ_RESOURCE_ERROR;
} else if (pid == 0) {
if ((ret = vps_set_cap(veid, &res->cap)))
goto env_err;
close_fds(0, wait_p, err_p, h->vzfd, -1);
if (fn == NULL) {
ret = _env_create(h, veid, wait_p, err_p, (void *)res);
} else {
ret = fn(h, veid, wait_p, err_p, data);
}
env_err:
if (ret)
write(STDIN_FILENO, &ret, sizeof(ret));
exit(ret);
}
return 0;
}
int vz_env_create(vps_handler *h, envid_t veid, vps_res *res,
int wait_p[2], int err_p[2], env_create_FN fn, void *data)
{
int ret, pid, errcode;
int status_p[2];
struct sigaction act, actold;
if (check_var(res->fs.root, "VE_ROOT is not set"))
return VZ_VE_ROOT_NOTSET;
if (pipe(status_p) < 0) {
logger(0, errno, "Can not create pipe");
return VZ_RESOURCE_ERROR;
}
sigaction(SIGCHLD, NULL, &actold);
sigemptyset(&act.sa_mask);
act.sa_handler = SIG_IGN;
act.sa_flags = SA_NOCLDSTOP;
sigaction(SIGCHLD, &act, NULL);
if ((pid = fork()) < 0) {
logger(0, errno, "Can not fork");
ret = VZ_RESOURCE_ERROR;
goto err;
} else if (pid == 0) {
dup2(status_p[1], STDIN_FILENO);
close(status_p[0]);
close(status_p[1]);
fcntl(STDIN_FILENO, F_SETFD, FD_CLOEXEC);
fcntl(err_p[1], F_SETFD, FD_CLOEXEC);
close(err_p[0]);
fcntl(wait_p[0], F_SETFD, FD_CLOEXEC);
close(wait_p[1]);
ret = vz_real_env_create(h, veid, res, wait_p[0], err_p[1], fn,
data);
if (ret)
write(STDIN_FILENO, &ret, sizeof(ret));
exit(ret);
}
/* Wait for enviroment created */
close(status_p[1]);
close(wait_p[0]);
close(err_p[1]);
ret = read(status_p[0], &errcode, sizeof(errcode));
if (ret) {
ret = errcode;
switch(ret) {
case VZ_NO_ACCES:
logger(0,0, "Permission denied");
break;
case VZ_BAD_KERNEL:
logger(0, 0, "Invalid kernel, or some kernel"
" modules are not loaded");
break;
case VZ_SET_CAP:
logger(0, 0, "Unable to set capability");
break;
case VZ_RESOURCE_ERROR:
logger(0, 0, "Not enough resources"
" to start environment");
break;
}
}
err:
close(status_p[1]);
close(status_p[0]);
sigaction(SIGCHLD, &actold, NULL);
return ret;
}
static void fix_numiptent(ub_param *ub)
{
unsigned long min_ipt;
ub_res *res;
res = get_ub_res(ub, UB_IPTENTRIES);
if (res == NULL)
return;
min_ipt = min_ul(res->limit[0], res->limit[1]);
if (min_ipt < MIN_NUMIPTENT) {
logger(0, 0, "Warning: NUMIPTENT %d:%d is less"
" than minimally allowable value, set to %d:%d",
res->limit[0], res->limit[1],
MIN_NUMIPTENT, MIN_NUMIPTENT);
res->limit[0] = MIN_NUMIPTENT;
res->limit[1] = MIN_NUMIPTENT;
}
}
static void fix_cpu(cpu_param *cpu)
{
if (cpu->units == NULL && cpu->weight == NULL) {
cpu->units = malloc(sizeof(*cpu->units));
*cpu->units = UNLCPUUNITS;
}
}
int vps_start_custom(vps_handler *h, envid_t veid, vps_param *param,
skipFlags skip, struct mod_action *mod,
env_create_FN fn, void *data)
{
int wait_p[2];
int err_p[2];
int ret;
char buf[64];
char *dist_name;
struct sigaction act;
vps_res *res = &param->res;
dist_actions actions;
memset(&actions, 0, sizeof(actions));
dist_name = get_dist_name(&res->tmpl);
ret = read_dist_actions(dist_name, DIST_DIR, &actions);
if (dist_name != NULL)
free(dist_name);
if (ret)
return ret;
if (check_var(res->fs.root, "VE_ROOT is not set"))
return VZ_VE_ROOT_NOTSET;
if (vps_is_run(h, veid)) {
logger(0, 0, "VPS is already running");
return VZ_VE_RUNNING;
}
logger(0, 0, "Starting VPS ...");
if (vps_is_mounted(res->fs.root)) {
/* if VPS mounted umount first, to cleanup mount state */
vps_umount(h, veid, res->fs.root, skip);
}
if (!vps_is_mounted(res->fs.root)) {
/* increase quota to perform setup */
quouta_inc(&res->dq, 100);
if ((ret = vps_mount(h, veid, &res->fs, &res->dq, skip)))
return ret;
quouta_inc(&res->dq, -100);
}
if (pipe(wait_p) < 0) {
logger(0, errno, "Can not create pipe");
return VZ_RESOURCE_ERROR;
}
if (pipe(err_p) < 0) {
close(wait_p[0]);
close(wait_p[1]);
logger(0, errno, "Can not create pipe");
return VZ_RESOURCE_ERROR;
}
sigemptyset(&act.sa_mask);
act.sa_handler = SIG_IGN;
act.sa_flags = 0;
sigaction(SIGPIPE, &act, NULL);
fix_numiptent(&res->ub);
fix_cpu(&res->cpu);
if ((ret = vz_env_create(h, veid, res, wait_p, err_p, fn, data)))
goto err;
if ((ret = vps_setup_res(h, veid, &actions, &res->fs, param,
STATE_STARTING, skip, mod)))
{
goto err;
}
if (!(skip & SKIP_ACTION_SCRIPT)) {
snprintf(buf, sizeof(buf), VPS_CONF_DIR "%d.%s", veid,
START_PREFIX);
if (stat_file(buf)) {
if (vps_exec_script(h, veid, res->fs.root, NULL, NULL,
buf, NULL, 0))
{
ret = VZ_ACTIONSCRIPT_ERROR;
goto err;
}
}
}
/* Close fd to start /sbin/init */
if (close(wait_p[1]))
logger(0, errno, "Unable to close fd to start init");
err:
free_dist_actions(&actions);
if (ret) {
int err;
/* Kill enviroment */
logger(0, 0, "VPS start failed");
write(wait_p[1], &err, sizeof(err));
} else {
if (!read(err_p[0], &ret, sizeof(ret))) {
logger(0, 0, "VPS start in progress...");
} else {
if (ret == VZ_FS_BAD_TMPL)
logger(0, 0, "Unable to start init, probably"
" incorrect template");
logger(0, 0, "VPS start failed");
}
}
if (ret) {
if (vps_is_run(h, veid))
env_stop(h, veid, res->fs.root, M_KILL);
vps_quotaoff(veid, &res->dq);
if (vps_is_mounted(res->fs.root))
vps_umount(h, veid, res->fs.root, skip);
}
close(wait_p[0]);
close(wait_p[1]);
close(err_p[0]);
close(err_p[1]);
return ret;
}
/** Sart and configure VPS.
*
* @param h VPS handler.
* @param veid VPS id.
* @param res VPS resourses.
* @param d_actions distribution specific sctions.
* @param skip flags to skip VPS setup (SKIP_SETUP|SKIP_ACTION_SCRIPT)
* @param action modules list, used to call setup() callback
* @return 0 on success.
*/
int vps_start(vps_handler *h, envid_t veid, vps_param *param,
skipFlags skip, struct mod_action *mod)
{
return vps_start_custom(h, veid, param, skip, mod, NULL, NULL);
}
static int real_env_stop(vps_handler *h, envid_t veid, char *vps_root,
int stop_mode)
{
int ret;
if ((ret = vz_chroot(vps_root)))
return ret;
if ((ret = vz_setluid(veid)))
return ret;
close_fds(1, h->vzfd, -1);
if ((ret = vz_env_create_ioctl(h, veid, VE_ENTER)) < 0) {
if (errno == ESRCH)
return 0;
logger(0, errno, "VPS enter failed");
return ret;
}
close(h->vzfd);
switch (stop_mode) {
case M_REBOOT:
{
char *argv[] = {"reboot", NULL};
execvep(argv[0], argv, NULL);
break;
}
case M_HALT:
{
char *argv[] = {"halt", NULL};
execvep(argv[0], argv, NULL);
break;
}
case M_KILL:
{
kill(-1, SIGTERM);
sleep(1);
kill(1, SIGKILL);
break;
}
}
return 0;
}
static int env_stop(vps_handler *h, envid_t veid, char *root, int stop_mode)
{
struct sigaction act, actold;
int i, pid, ret = 0;
sigaction(SIGCHLD, NULL, &actold);
sigemptyset(&act.sa_mask);
act.sa_handler = SIG_IGN;
act.sa_flags = SA_NOCLDSTOP;
sigaction(SIGCHLD, &act, NULL);
logger(0, 0, "Stopping VPS ...");
if (stop_mode == M_KILL)
goto kill_vps;
if ((pid = fork()) < 0) {
logger(0, errno, "Can not fork");
ret = VZ_RESOURCE_ERROR;
goto out;
} else if (pid == 0) {
ret = real_env_stop(h, veid, root, stop_mode);
exit(ret);
}
for (i = 0; i < MAX_SHTD_TM; i++) {
sleep(1);
if (!vps_is_run(h, veid)) {
ret = 0;
goto out;
}
}
kill_vps:
if ((pid = fork()) < 0) {
ret = VZ_RESOURCE_ERROR;
logger(0, errno, "Can not fork");
goto err;
} else if (pid == 0) {
ret = real_env_stop(h, veid, root, M_KILL);
exit(ret);
}
ret = VZ_STOP_ERROR;
for (i = 0; i < MAX_SHTD_TM; i++) {
usleep(500000);
if (!vps_is_run(h, veid)) {
ret = 0;
break;
}
}
out:
if (ret)
logger(0, 0, "Unable to stop VPS, operation timed out");
else
logger(0, 0, "VPS was stopped");
err:
sigaction(SIGCHLD, &actold, NULL);
return ret;
}
/** Stop VPS.
*
* @param h VPS handler.
* @param veid VPS id.
* @param res VPS resourses.
* @param stop_mode stop mode one of (M_REBOOT M_HALT M_KILL).
* @param skip flags to skip run action script (SKIP_ACTION_SCRIPT)
* @param action modules list, used to call cleanup() callback.
* @return 0 on success.
*/
int vps_stop(vps_handler *h, envid_t veid, vps_param *param, int stop_mode,
skipFlags skip, struct mod_action *action)
{
int ret;
list_head_t ips;
char buf[64];
vps_res *res = &param->res;
list_head_init(&ips);
if (check_var(res->fs.root, "VE_ROOT is not set"))
return VZ_VE_ROOT_NOTSET;
if (!vps_is_run(h, veid)) {
logger(0, 0, "Unable to stop: VPS is not running");
return 0;
}
if (!(skip & SKIP_ACTION_SCRIPT)) {
snprintf(buf, sizeof(buf), VPS_CONF_DIR "%d.%s", veid,
STOP_PREFIX);
if (stat_file(buf)) {
if (vps_exec_script(h, veid, res->fs.root, NULL, NULL,
buf, NULL, 0))
{
return VZ_ACTIONSCRIPT_ERROR;
}
}
}
get_vps_ip(h, veid, &ips);
if ((ret = env_stop(h, veid, res->fs.root, stop_mode)))
return ret;
mod_cleanup(h, veid, action, param);
if (!res->net.skip_route_cleanup)
run_net_script(veid, DEL, &ips, STATE_RUNNING);
ret = vps_umount(h, veid, res->fs.root, skip);
/* Clear VPS network configuration*/
ret = run_pre_script(veid, VPS_STOP);
free_str_param(&ips);
return ret;
}