/*
 *  Copyright (C) 2000-2012, Parallels, Inc. All rights reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <sys/types.h>
#include <sys/stat.h>
#include <grp.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <linux/vzcalluser.h>
#include <linux/vzctl_veth.h>
#include <linux/vzctl_venet.h>

#include "env.h"
#include "util.h"
#include "types.h"
#include "logger.h"
#include "vzerror.h"
#include "vzsyscalls.h"

#define ENVRETRY	3

static int vz_env_create_ioctl(vps_handler *h, envid_t veid, int flags)
{
	struct vzctl_env_create env_create;
	int errcode;
	int retry = 0;

	memset(&env_create, 0, sizeof(env_create));
	env_create.veid = veid;
	env_create.flags = flags;
	do {
		if (retry)
			sleep(1);
		errcode = ioctl(h->vzfd, VZCTL_ENV_CREATE, &env_create);
	} while (errcode < 0 && errno == EBUSY && retry++ < ENVRETRY);
	if (errcode >= 0 && (flags & VE_ENTER)) {
		/* Clear supplementary group IDs */
		setgroups(0, NULL);
		/* Set personality PER_LINUX32 for i386 based CTs */
		set_personality32();
	}
	return errcode;
}

static int vz_is_run(vps_handler *h, envid_t veid)
{
	int ret = vz_env_create_ioctl(h, veid, VE_TEST);

	if (ret < 0 && (errno == ESRCH || errno == ENOTTY))
		return 0;
	else if (ret < 0)
		logger(-1, errno, "Error on vz_env_create_ioctl(VE_TEST)");
	return 1;
}

static int vz_enter(vps_handler *h, envid_t veid, const char *root, int flags)
{
	int ret;

	if ((ret = vz_chroot(root)))
		return ret;

	ret = vz_env_create_ioctl(h, veid, VE_ENTER | flags);
	if (ret < 0) {
		if (errno == ESRCH)
			ret = VZ_VE_NOT_RUNNING;
		else
			ret = VZ_ENVCREATE_ERROR;
	}
	else
		ret = 0;

	close(h->vzfd);
	return ret;
}

static int vz_destroy(vps_handler *h, envid_t veid)
{
	/* Destroys automatically after reboot */
	return 0;
}

static int vz_env_configure(int fd, envid_t veid, const char *osrelease)
{
	int ret = 0;
	struct vzctl_ve_configure *cparam;
	int len;

	len = strlen(osrelease) + 1;
	cparam = calloc(1, sizeof(struct vzctl_ve_configure) + len);
	if (cparam == NULL)
		return VZ_RESOURCE_ERROR;

	cparam->veid = veid;
	cparam->key = VE_CONFIGURE_OS_RELEASE;
	cparam->size = len;
	strcpy(cparam->data, osrelease);

	if (ioctl(fd, VZCTL_VE_CONFIGURE, cparam) != 0)
		if (errno != ENOTTY)
			ret = VZ_SET_OSRELEASE;

	free(cparam);

	return ret;
}

static int vz_env_create_data_ioctl(vps_handler *h,
	struct vzctl_env_create_data *data)
{
	int errcode;
	int retry = 0;

	do {
		if (retry)
			sleep(1);
		errcode = ioctl(h->vzfd, VZCTL_ENV_CREATE_DATA, data);
	} while (errcode < 0 && errno == EBUSY && retry++ < ENVRETRY);

	return errcode;
}

static int _env_create(vps_handler *h, void *data)
{
	struct vzctl_env_create_data env_create_data;
	struct env_create_param3 create_param;
	int ret;
	struct arg_start *arg = data;
	envid_t veid = arg->veid;
	int wait_p = arg->wait_p;
	int err_p = arg->err_p;

	fill_container_param(arg, &create_param);

	env_create_data.veid = veid;
	env_create_data.class_id = 0;
	env_create_data.flags = VE_CREATE | VE_EXCLUSIVE;
	env_create_data.data = &create_param;
	env_create_data.datalen = sizeof(create_param);

	/* Close all fds except stdin. stdin is status pipe */
	close(STDERR_FILENO); close(STDOUT_FILENO);
	close_fds(0, wait_p, err_p, h->vzfd, -1);

try:
	ret = vz_env_create_data_ioctl(h, &env_create_data);
	if (ret < 0) {
		switch(errno) {
			case EINVAL:
				ret = VZ_ENVCREATE_ERROR;
				/* Run-time kernel did not understand the
				 * latest create_param -- so retry with
				 * the old env_create_param structs.
				 */
				switch (env_create_data.datalen) {
				case sizeof(struct env_create_param3):
					env_create_data.datalen =
						sizeof(struct env_create_param2);
					goto try;
				case sizeof(struct env_create_param2):
					env_create_data.datalen =
						sizeof(struct env_create_param);
					goto try;
				}
				break;
			case EACCES:
			/* License is not loaded */
				ret = VZ_NO_ACCES;
				break;
			case ENOTTY:
			/* Some vz modules are not present */
				ret = VZ_BAD_KERNEL;
				break;
			default:
				logger(-1, errno, "env_create error");
				ret = VZ_ENVCREATE_ERROR;
				break;
		}
		return ret;
	}

	if (arg->res->env.osrelease != NULL) {
		ret = vz_env_configure(h->vzfd, veid,
				arg->res->env.osrelease);
		if (ret != 0)
			return ret;
	}

	close(h->vzfd);
	return exec_container_init(arg, &create_param);
}

static inline int setluid(uid_t uid)
{
	return syscall(__NR_setluid, uid);
}

static int vz_setluid(envid_t veid)
{
	if (setluid(veid) == -1) {
		if (errno == ENOSYS)
			logger(-1, 0, "Error: kernel does not support"
				" user resources. Please, rebuild with"
				" CONFIG_USER_RESOURCE=y");
		return VZ_SETLUID_ERROR;
	}
	return 0;
}

static int vz_do_env_create(struct arg_start *arg)
{
	int ret, pid;
	int wait_p = arg->wait_p;
	int old_wait_p = arg->old_wait_p;
	int err_p = arg->err_p;
	env_create_FN fn = arg->fn;
	void *data = arg->data;
	struct vps_res *res = arg->res;
	vps_handler *h = arg->h;
	envid_t veid = arg->veid;

	if ((ret = vz_chroot(res->fs.root)))
		return ret;
	if ((ret = vz_setluid(veid)))
		return ret;
	if ((ret = set_ublimit(h, veid, &res->ub)))
		return ret;
	/* Create another process for proper resource accounting */
	if ((pid = fork()) < 0) {
		logger(-1, errno, "Unable to fork");
		return VZ_RESOURCE_ERROR;
	} else if (pid == 0) {
		if ((ret = vps_set_cap(veid, &res->env, &res->cap)))
			goto env_err;
		if (fn == NULL) {
			ret = _env_create(h, (void *)arg);
		} else {
			ret = fn(h, veid, wait_p, old_wait_p, err_p, data);
		}
env_err:
		if (ret)
			write(STDIN_FILENO, &ret, sizeof(ret));
		exit(ret);
	}
	return 0;
}

static int vz_setcpu(vps_handler *h, envid_t veid, cpu_param *cpu)
{
	int ret = 0;

	if (cpu->limit != NULL)
		ret = set_cpulimit(veid, *cpu->limit);

	if (cpu->units != NULL)
		ret = set_cpuunits(veid, *cpu->units);
	else if (cpu->weight != NULL)
		ret = set_cpuweight(veid, *cpu->weight);

	if (cpu->vcpus != NULL)
		ret = env_set_vcpus(veid, *cpu->vcpus);
	if (cpu->mask != NULL)
		ret = set_cpumask(veid, cpu->mask);
	return ret;
}

static int vz_set_devperm(vps_handler *h, envid_t veid, dev_res *dev)
{
	struct vzctl_setdevperms devperms;

	devperms.veid = veid;
	devperms.dev = dev->dev;
	devperms.mask = dev->mask;
	devperms.type = dev->type;

	if (ioctl(h->vzfd, VZCTL_SETDEVPERMS, &devperms)) {
		logger(-1, errno, "Error setting device permissions");
		return VZ_SET_DEVICES;
	}

	return 0;
}

static int vz_netdev_ctl(vps_handler *h, envid_t veid, int op, char *name)
{
	struct vzctl_ve_netdev ve_netdev;

	ve_netdev.veid = veid;
	ve_netdev.op = op;
	ve_netdev.dev_name = name;
	if (ioctl(h->vzfd, VZCTL_VE_NETDEV, &ve_netdev) < 0)
		return VZ_NETDEV_ERROR;
	return 0;
}

static int vz_ip_ctl(vps_handler *h, envid_t veid, int op, const char *ipstr)
{
	struct vzctl_ve_ip_map ip_map;
	int family;
	unsigned int ip[4];
	int ret;

	union {
		struct sockaddr_in  a4;
		struct sockaddr_in6 a6;
	} addr;

	if ((family = get_netaddr(ipstr, ip)) < 0)
		return 0;


	if (family == AF_INET) {
		addr.a4.sin_family = AF_INET;
		addr.a4.sin_addr.s_addr = ip[0];
		addr.a4.sin_port = 0;
		ip_map.addrlen = sizeof(addr.a4);
	} else if (family == AF_INET6) {
		addr.a6.sin6_family = AF_INET6;
		memcpy(&addr.a6.sin6_addr, ip, 16);
		addr.a6.sin6_port = 0;
		ip_map.addrlen = sizeof(addr.a6);
	} else {
		return -EAFNOSUPPORT;
	}

	ip_map.veid = veid;
	ip_map.op = op;
	ip_map.addr = (struct sockaddr*) &addr;

	ret = ioctl(h->vzfd, VENETCTL_VE_IP_MAP, &ip_map);
	if (ret) {
		switch (errno) {
			case EADDRINUSE:
				ret = VZ_IP_INUSE;
				break;
			case ESRCH:
				ret = VZ_VE_NOT_RUNNING;
				break;
			case EADDRNOTAVAIL:
				if (op == VE_IP_DEL)
					return 0;
				ret = VZ_IP_NA;
				break;
			default:
				ret = VZ_CANT_ADDIP;
				break;
		}
		logger(-1, errno, "Unable to %s IP %s",
			op == VE_IP_ADD ? "add" : "del", ipstr);
	}
	return ret;

}

static int veth_dev_mac_filter(vps_handler *h, envid_t veid, veth_dev *dev)
{
	struct vzctl_ve_hwaddr veth;
	int ret;

	veth.op = dev->mac_filter == YES ? VE_ETH_DENY_MAC_CHANGE :
							VE_ETH_ALLOW_MAC_CHANGE;
	veth.veid = veid;
	memcpy(veth.dev_name, dev->dev_name, IFNAMSIZE);
	memcpy(veth.dev_name_ve, dev->dev_name_ve, IFNAMSIZE);
	ret = ioctl(h->vzfd, VETHCTL_VE_HWADDR, &veth);
	if (ret) {
		if (errno != ENODEV) {
			logger(-1, errno, "Unable to set mac filter");
			ret = VZ_VETH_ERROR;
		} else
			ret = 0;
	}
	return ret;
}

static int veth_dev_create(vps_handler *h, envid_t veid, veth_dev *dev)
{
	struct vzctl_ve_hwaddr veth;

	if (!dev->dev_name[0] || dev->addrlen != ETH_ALEN)
		return VZ_VETH_ERROR;
	if (dev->addrlen_ve != 0 && dev->addrlen_ve != ETH_ALEN)
		return VZ_VETH_ERROR;
	veth.op = VE_ETH_ADD;
	veth.veid = veid;
	veth.addrlen = dev->addrlen;
	veth.addrlen_ve = dev->addrlen_ve;
	memcpy(veth.dev_addr, dev->dev_addr, ETH_ALEN);
	memcpy(veth.dev_addr_ve, dev->dev_addr_ve, ETH_ALEN);
	memcpy(veth.dev_name, dev->dev_name, IFNAMSIZE);
	memcpy(veth.dev_name_ve, dev->dev_name_ve, IFNAMSIZE);
	if (ioctl(h->vzfd, VETHCTL_VE_HWADDR, &veth) != 0) {
		if (errno == ENOTTY) {
			logger(-1, 0, "Error: veth feature is"
				" not supported by kernel");
			logger(-1, 0, "Please check that vzethdev"
				" kernel module is loaded");
		} else {
			logger(-1, errno, "Unable to create veth");
		}
		return VZ_VETH_ERROR;
	}

	return 0;
}

static int veth_dev_remove(vps_handler *h, envid_t veid, veth_dev *dev)
{
	struct vzctl_ve_hwaddr veth;
	int ret;

	if (!dev->dev_name[0])
		return EINVAL;
	veth.op = VE_ETH_DEL;
	veth.veid = veid;
	memcpy(veth.dev_name, dev->dev_name, IFNAMSIZE);
	ret = ioctl(h->vzfd, VETHCTL_VE_HWADDR, &veth);
	if (ret) {
		if (errno != ENODEV) {
			logger(-1, errno, "Unable to remove veth");
			ret = VZ_VETH_ERROR;
		} else
			ret = 0;
	}
	return ret;
}

static int vz_veth_ctl(vps_handler *h, envid_t veid, int op, veth_dev *dev)
{
	int ret = 0;
	if (op == ADD) {
		if (!dev->active) {
			if ((ret = veth_dev_create(h, veid, dev)))
				return ret;
		}
		dev->flags = 1;
		if (dev->mac_filter) {
			if ((ret = veth_dev_mac_filter(h, veid, dev)))
				return ret;
		}
	} else {
		if (!dev->active)
			return ret;
		ret = veth_dev_remove(h, veid, dev);
	}
	return ret;
}

int vz_do_open(vps_handler *h)
{
	if ((h->vzfd = open(VZCTLDEV, O_RDWR)) < 0) {
		logger(-1, errno, "Unable to open %s", VZCTLDEV);
		logger(-1, 0, "Please check that vzdev kernel module is loaded"
			" and you have sufficient permissions"
			" to access the file.");
		return -1;
	}

	if (vz_env_create_ioctl(h, 0, 0) < 0 &&
		(errno == ENOSYS || errno == EPERM))
	{
		logger(-1, 0, "Your kernel lacks support for virtual"
			" environments or modules not loaded");
		goto err;
	}

	h->is_run = vz_is_run;
	h->enter = vz_enter;
	h->destroy = vz_destroy;
	h->env_create = vz_do_env_create;
	h->setlimits = set_ublimit;
	h->setcpus = vz_setcpu;
	h->setcontext = vz_setluid;
	h->setdevperm = vz_set_devperm;
	h->netdev_ctl = vz_netdev_ctl;
	h->ip_ctl = vz_ip_ctl;
	h->veth_ctl = vz_veth_ctl;

	return 0;
err:
	close(h->vzfd);
	return -1;
}
