/*
 * Copyright (C) 2004-2006 Kay Sievers <kay.sievers@vrfy.org>
 * Copyright (C) 2004 Chris Friesen <chris_friesen@sympatico.ca>
 *
 *	This program is free software; you can redistribute it and/or modify it
 *	under the terms of the GNU General Public License as published by the
 *	Free Software Foundation version 2 of the License.
 *
 *	This program is distributed in the hope that it will be useful, but
 *	WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *	General Public License for more details.
 *
 *	You should have received a copy of the GNU General Public License along
 *	with this program; if not, write to the Free Software Foundation, Inc.,
 *	51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 *
 */

#include <stddef.h>
#include <signal.h>
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <dirent.h>
#include <fcntl.h>
#include <syslog.h>
#include <time.h>
#include <getopt.h>
#include <sys/select.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <linux/types.h>
#include <linux/netlink.h>

#include "udev.h"
#include "udev_rules.h"
#include "udevd.h"
#include "udev_selinux.h"

static int debug_trace;
static int debug;

static struct udev_rules rules;
static int udevd_sock = -1;
static int uevent_netlink_sock = -1;
static int inotify_fd = -1;
static pid_t sid;

static int signal_pipe[2] = {-1, -1};
static volatile int sigchilds_waiting;
static volatile int udev_exit;
static volatile int reload_config;
static int run_exec_q;
static int stop_exec_q;
static int max_childs;
static int max_childs_running;
static char udev_log[32];

static LIST_HEAD(exec_list);
static LIST_HEAD(running_list);


#ifdef USE_LOG
void log_message(int priority, const char *format, ...)
{
	va_list args;

	if (priority > udev_log_priority)
		return;

	va_start(args, format);
	if (debug) {
		printf("[%d] ", (int) getpid());
		vprintf(format, args);
	} else
		vsyslog(priority, format, args);
	va_end(args);
}

#endif

static void asmlinkage udev_event_sig_handler(int signum)
{
	if (signum == SIGALRM)
		exit(1);
}

static int udev_event_process(struct udevd_uevent_msg *msg)
{
	struct sigaction act;
	struct udevice *udev;
	int i;
	int retval;

	/* set signal handlers */
	memset(&act, 0x00, sizeof(act));
	act.sa_handler = (void (*)(int)) udev_event_sig_handler;
	sigemptyset (&act.sa_mask);
	act.sa_flags = 0;
	sigaction(SIGALRM, &act, NULL);

	/* reset to default */
	act.sa_handler = SIG_DFL;
	sigaction(SIGINT, &act, NULL);
	sigaction(SIGTERM, &act, NULL);
	sigaction(SIGCHLD, &act, NULL);
	sigaction(SIGHUP, &act, NULL);

	/* trigger timeout to prevent hanging processes */
	alarm(UDEV_EVENT_TIMEOUT);

	/* reconstruct event environment from message */
	for (i = 0; msg->envp[i]; i++)
		putenv(msg->envp[i]);

	udev = udev_device_init(NULL);
	if (udev == NULL)
		return -1;
	strlcpy(udev->action, msg->action, sizeof(udev->action));
	sysfs_device_set_values(udev->dev, msg->devpath, msg->subsystem, msg->driver);
	udev->devpath_old = msg->devpath_old;
	udev->devt = msg->devt;

	retval = udev_device_event(&rules, udev);

	/* rules may change/disable the timeout */
	if (udev->event_timeout >= 0)
		alarm(udev->event_timeout);

	/* run programs collected by RUN-key*/
	if (retval == 0 && !udev->ignore_device && udev_run)
		retval = udev_rules_run(udev);

	udev_device_cleanup(udev);
	return retval;
}

enum event_state {
	EVENT_QUEUED,
	EVENT_FINISHED,
	EVENT_FAILED,
};

static void export_event_state(struct udevd_uevent_msg *msg, enum event_state state)
{
	char filename[PATH_SIZE];
	char filename_failed[PATH_SIZE];
	size_t start;

	/* location of queue file */
	snprintf(filename, sizeof(filename), "%s/"EVENT_QUEUE_DIR"/%llu", udev_root, msg->seqnum);

	/* location of failed file */
	strlcpy(filename_failed, udev_root, sizeof(filename_failed));
	strlcat(filename_failed, "/", sizeof(filename_failed));
	start = strlcat(filename_failed, EVENT_FAILED_DIR"/", sizeof(filename_failed));
	strlcat(filename_failed, msg->devpath, sizeof(filename_failed));
	path_encode(&filename_failed[start], sizeof(filename_failed) - start);

	switch (state) {
	case EVENT_QUEUED:
		unlink(filename_failed);
		delete_path(filename_failed);

		create_path(filename);
 	        selinux_setfscreatecon(filename, NULL, S_IFLNK);	
		symlink(msg->devpath, filename);
 	        selinux_resetfscreatecon();
		break;
	case EVENT_FINISHED:
		if (msg->devpath_old != NULL) {
			/* "move" event - rename failed file to current name, do not delete failed */
			char filename_failed_old[PATH_SIZE];

			strlcpy(filename_failed_old, udev_root, sizeof(filename_failed_old));
			strlcat(filename_failed_old, "/", sizeof(filename_failed_old));
			start = strlcat(filename_failed_old, EVENT_FAILED_DIR"/", sizeof(filename_failed_old));
			strlcat(filename_failed_old, msg->devpath_old, sizeof(filename_failed_old));
			path_encode(&filename_failed_old[start], sizeof(filename) - start);

			if (rename(filename_failed_old, filename_failed) == 0)
				info("renamed devpath, moved failed state of '%s' to %s'\n",
				     msg->devpath_old, msg->devpath);
		} else {
			unlink(filename_failed);
			delete_path(filename_failed);
		}

		unlink(filename);
		delete_path(filename);
		break;
	case EVENT_FAILED:
		/* move failed event to the failed directory */
		create_path(filename_failed);
		rename(filename, filename_failed);

		/* clean up possibly empty queue directory */
		delete_path(filename);
		break;
	}

	return;
}

static void msg_queue_delete(struct udevd_uevent_msg *msg)
{
	list_del(&msg->node);

	/* mark as failed, if "add" event returns non-zero */
	if (msg->exitstatus && strcmp(msg->action, "add") == 0)
		export_event_state(msg, EVENT_FAILED);
	else
		export_event_state(msg, EVENT_FINISHED);

	free(msg);
}

static void udev_event_run(struct udevd_uevent_msg *msg)
{
	pid_t pid;
	int retval;

	pid = fork();
	switch (pid) {
	case 0:
		/* child */
		close(uevent_netlink_sock);
		close(udevd_sock);
		if (inotify_fd >= 0)
			close(inotify_fd);
		close(signal_pipe[READ_END]);
		close(signal_pipe[WRITE_END]);
		logging_close();

		logging_init("udevd-event");
		setpriority(PRIO_PROCESS, 0, UDEV_PRIORITY);

		retval = udev_event_process(msg);
		info("seq %llu finished with %i\n", msg->seqnum, retval);

		logging_close();
		if (retval)
			exit(1);
		exit(0);
	case -1:
		err("fork of child failed: %s\n", strerror(errno));
		msg_queue_delete(msg);
		break;
	default:
		/* get SIGCHLD in main loop */
		info("seq %llu forked, pid [%d], '%s' '%s', %ld seconds old\n",
		     msg->seqnum, pid,  msg->action, msg->subsystem, time(NULL) - msg->queue_time);
		msg->pid = pid;
	}
}

static void msg_queue_insert(struct udevd_uevent_msg *msg)
{
	char filename[PATH_SIZE];
	int fd;

	msg->queue_time = time(NULL);

	strlcpy(filename, udev_root, sizeof(filename));
	strlcat(filename, "/" EVENT_SEQNUM, sizeof(filename));
	fd = open(filename, O_WRONLY|O_TRUNC|O_CREAT, 0644);
	if (fd >= 0) {
		char str[32];
		int len;

		len = sprintf(str, "%llu\n", msg->seqnum);
		write(fd, str, len);
		close(fd);
	}

	export_event_state(msg, EVENT_QUEUED);
	info("seq %llu queued, '%s' '%s'\n", msg->seqnum, msg->action, msg->subsystem);

	/* run one event after the other in debug mode */
	if (debug_trace) {
		list_add_tail(&msg->node, &running_list);
		udev_event_run(msg);
		waitpid(msg->pid, NULL, 0);
		msg_queue_delete(msg);
		return;
	}

	/* run all events with a timeout set immediately */
	if (msg->timeout != 0) {
		list_add_tail(&msg->node, &running_list);
		udev_event_run(msg);
		return;
	}

	list_add_tail(&msg->node, &exec_list);
	run_exec_q = 1;
}

static int mem_size_mb(void)
{
	FILE* f;
	char buf[4096];
	long int memsize = -1;

	f = fopen("/proc/meminfo", "r");
	if (f == NULL)
		return -1;

	while (fgets(buf, sizeof(buf), f) != NULL) {
		long int value;

		if (sscanf(buf, "MemTotal: %ld kB", &value) == 1) {
			memsize = value / 1024;
			break;
		}
	}

	fclose(f);
	return memsize;
}

static int cpu_count(void)
{
	FILE* f;
	char buf[4096];
	int count = 0;

	f = fopen("/proc/stat", "r");
	if (f == NULL)
		return -1;

	while (fgets(buf, sizeof(buf), f) != NULL) {
		if (strncmp(buf, "cpu", 3) == 0 && isdigit(buf[3]))
			count++;
	}

	fclose(f);
	if (count == 0)
		return -1;
	return count;
}

static int running_processes(void)
{
	FILE* f;
	char buf[4096];
	int running = -1;

	f = fopen("/proc/stat", "r");
	if (f == NULL)
		return -1;

	while (fgets(buf, sizeof(buf), f) != NULL) {
		int value;

		if (sscanf(buf, "procs_running %u", &value) == 1) {
			running = value;
			break;
		}
	}

	fclose(f);
	return running;
}

/* return the number of process es in our session, count only until limit */
static int running_processes_in_session(pid_t session, int limit)
{
	DIR *dir;
	struct dirent *dent;
	int running = 0;

	dir = opendir("/proc");
	if (!dir)
		return -1;

	/* read process info from /proc */
	for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) {
		int f;
		char procdir[64];
		char line[256];
		const char *pos;
		char state;
		pid_t ppid, pgrp, sess;
		int len;

		if (!isdigit(dent->d_name[0]))
			continue;

		snprintf(procdir, sizeof(procdir), "/proc/%s/stat", dent->d_name);
		procdir[sizeof(procdir)-1] = '\0';

		f = open(procdir, O_RDONLY);
		if (f == -1)
			continue;

		len = read(f, line, sizeof(line)-1);
		close(f);

		if (len <= 0)
			continue;
		else
			line[len] = '\0';

		/* skip ugly program name */
		pos = strrchr(line, ')') + 2;
		if (pos == NULL)
			continue;

		if (sscanf(pos, "%c %d %d %d ", &state, &ppid, &pgrp, &sess) != 4)
			continue;

		/* count only processes in our session */
		if (sess != session)
			continue;

		/* count only running, no sleeping processes */
		if (state != 'R')
			continue;

		running++;
		if (limit > 0 && running >= limit)
			break;
	}
	closedir(dir);

	return running;
}

static int compare_devpath(const char *running, const char *waiting)
{
	int i;

	for (i = 0; i < PATH_SIZE; i++) {
		/* identical device event found */
		if (running[i] == '\0' && waiting[i] == '\0')
			return 1;

		/* parent device event found */
		if (running[i] == '\0' && waiting[i] == '/')
			return 2;

		/* child device event found */
		if (running[i] == '/' && waiting[i] == '\0')
			return 3;

		/* no matching event */
		if (running[i] != waiting[i])
			break;
	}

	return 0;
}

/* lookup event for identical, parent, child, or physical device */
static int devpath_busy(struct udevd_uevent_msg *msg, int limit)
{
	struct udevd_uevent_msg *loop_msg;
	int childs_count = 0;

	/* check exec-queue which may still contain delayed events we depend on */
	list_for_each_entry(loop_msg, &exec_list, node) {
		/* skip ourself and all later events */
		if (loop_msg->seqnum >= msg->seqnum)
			break;

		/* check our old name */
		if (msg->devpath_old != NULL)
			if (strcmp(loop_msg->devpath , msg->devpath_old) == 0)
				return 2;

		/* check identical, parent, or child device event */
		if (compare_devpath(loop_msg->devpath, msg->devpath) != 0) {
			dbg("%llu, device event still pending %llu (%s)\n",
			    msg->seqnum, loop_msg->seqnum, loop_msg->devpath);
			return 3;
		}

		/* check for our major:minor number */
		if (msg->devt && loop_msg->devt == msg->devt &&
		    strcmp(msg->subsystem, loop_msg->subsystem) == 0) {
			dbg("%llu, device event still pending %llu (%d:%d)\n", msg->seqnum,
			    loop_msg->seqnum, major(loop_msg->devt), minor(loop_msg->devt));
			return 4;
		}

		/* check physical device event (special case of parent) */
		if (msg->physdevpath && msg->action && strcmp(msg->action, "add") == 0)
			if (compare_devpath(loop_msg->devpath, msg->physdevpath) != 0) {
				dbg("%llu, physical device event still pending %llu (%s)\n",
				    msg->seqnum, loop_msg->seqnum, loop_msg->devpath);
				return 5;
			}
	}

	/* check run queue for still running events */
	list_for_each_entry(loop_msg, &running_list, node) {
		if (limit && childs_count++ > limit) {
			dbg("%llu, maximum number (%i) of childs reached\n", msg->seqnum, childs_count);
			return 1;
		}

		/* check our old name */
		if (msg->devpath_old != NULL)
			if (strcmp(loop_msg->devpath , msg->devpath_old) == 0)
				return 2;

		/* check identical, parent, or child device event */
		if (compare_devpath(loop_msg->devpath, msg->devpath) != 0) {
			dbg("%llu, device event still running %llu (%s)\n",
			    msg->seqnum, loop_msg->seqnum, loop_msg->devpath);
			return 3;
		}

		/* check for our major:minor number */
		if (msg->devt && loop_msg->devt == msg->devt &&
		    strcmp(msg->subsystem, loop_msg->subsystem) == 0) {
			dbg("%llu, device event still running %llu (%d:%d)\n", msg->seqnum,
			    loop_msg->seqnum, major(loop_msg->devt), minor(loop_msg->devt));
			return 4;
		}

		/* check physical device event (special case of parent) */
		if (msg->physdevpath && msg->action && strcmp(msg->action, "add") == 0)
			if (compare_devpath(loop_msg->devpath, msg->physdevpath) != 0) {
				dbg("%llu, physical device event still running %llu (%s)\n",
				    msg->seqnum, loop_msg->seqnum, loop_msg->devpath);
				return 5;
			}
	}
	return 0;
}

/* serializes events for the identical and parent and child devices */
static void msg_queue_manager(void)
{
	struct udevd_uevent_msg *loop_msg;
	struct udevd_uevent_msg *tmp_msg;
	int running;

	if (list_empty(&exec_list))
		return;

	running = running_processes();
	dbg("%d processes runnning on system\n", running);
	if (running < 0)
		running = max_childs_running;

	list_for_each_entry_safe(loop_msg, tmp_msg, &exec_list, node) {
		/* check running processes in our session and possibly throttle */
		if (running >= max_childs_running) {
			running = running_processes_in_session(sid, max_childs_running+10);
			dbg("at least %d processes running in session\n", running);
			if (running >= max_childs_running) {
				dbg("delay seq %llu, too many processes already running\n", loop_msg->seqnum);
				return;
			}
		}

		/* serialize and wait for parent or child events */
		if (devpath_busy(loop_msg, max_childs) != 0) {
			dbg("delay seq %llu (%s)\n", loop_msg->seqnum, loop_msg->devpath);
			continue;
		}

		/* move event to run list */
		list_move_tail(&loop_msg->node, &running_list);
		udev_event_run(loop_msg);
		running++;
		dbg("moved seq %llu to running list\n", loop_msg->seqnum);
	}
}

static struct udevd_uevent_msg *get_msg_from_envbuf(const char *buf, int buf_size)
{
	int bufpos;
	int i;
	struct udevd_uevent_msg *msg;
	char *physdevdriver_key = NULL;
	int maj = 0;
	int min = 0;

	msg = malloc(sizeof(struct udevd_uevent_msg) + buf_size);
	if (msg == NULL)
		return NULL;
	memset(msg, 0x00, sizeof(struct udevd_uevent_msg) + buf_size);

	/* copy environment buffer and reconstruct envp */
	memcpy(msg->envbuf, buf, buf_size);
	bufpos = 0;
	for (i = 0; (bufpos < buf_size) && (i < UEVENT_NUM_ENVP-2); i++) {
		int keylen;
		char *key;

		key = &msg->envbuf[bufpos];
		keylen = strlen(key);
		msg->envp[i] = key;
		bufpos += keylen + 1;
		dbg("add '%s' to msg.envp[%i]\n", msg->envp[i], i);

		/* remember some keys for further processing */
		if (strncmp(key, "ACTION=", 7) == 0)
			msg->action = &key[7];
		else if (strncmp(key, "DEVPATH=", 8) == 0)
			msg->devpath = &key[8];
		else if (strncmp(key, "SUBSYSTEM=", 10) == 0)
			msg->subsystem = &key[10];
		else if (strncmp(key, "DRIVER=", 7) == 0)
			msg->driver = &key[7];
		else if (strncmp(key, "SEQNUM=", 7) == 0)
			msg->seqnum = strtoull(&key[7], NULL, 10);
		else if (strncmp(key, "DEVPATH_OLD=", 12) == 0)
			msg->devpath_old = &key[12];
		else if (strncmp(key, "PHYSDEVPATH=", 12) == 0)
			msg->physdevpath = &key[12];
		else if (strncmp(key, "PHYSDEVDRIVER=", 14) == 0)
			physdevdriver_key = key;
		else if (strncmp(key, "MAJOR=", 6) == 0)
			maj = strtoull(&key[6], NULL, 10);
		else if (strncmp(key, "MINOR=", 6) == 0)
			min = strtoull(&key[6], NULL, 10);
		else if (strncmp(key, "TIMEOUT=", 8) == 0)
			msg->timeout = strtoull(&key[8], NULL, 10);
	}
	msg->devt = makedev(maj, min);
	msg->envp[i++] = "UDEVD_EVENT=1";

	if (msg->driver == NULL && msg->physdevpath == NULL && physdevdriver_key != NULL) {
		/* for older kernels DRIVER is empty for a bus device, export PHYSDEVDRIVER as DRIVER */
		msg->envp[i++] = &physdevdriver_key[7];
		msg->driver = &physdevdriver_key[14];
	}

	msg->envp[i] = NULL;

	if (msg->devpath == NULL || msg->action == NULL) {
		info("DEVPATH or ACTION missing, ignore message\n");
		free(msg);
		return NULL;
	}
	return msg;
}

/* receive the udevd message from userspace */
static void get_ctrl_msg(void)
{
	struct udevd_ctrl_msg ctrl_msg;
	ssize_t size;
	struct msghdr smsg;
	struct cmsghdr *cmsg;
	struct iovec iov;
	struct ucred *cred;
	char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
	int *intval;
	char *pos;

	memset(&ctrl_msg, 0x00, sizeof(struct udevd_ctrl_msg));
	iov.iov_base = &ctrl_msg;
	iov.iov_len = sizeof(struct udevd_ctrl_msg);

	memset(&smsg, 0x00, sizeof(struct msghdr));
	smsg.msg_iov = &iov;
	smsg.msg_iovlen = 1;
	smsg.msg_control = cred_msg;
	smsg.msg_controllen = sizeof(cred_msg);

	size = recvmsg(udevd_sock, &smsg, 0);
	if (size <  0) {
		if (errno != EINTR)
			err("unable to receive user udevd message: %s\n", strerror(errno));
		return;
	}
	cmsg = CMSG_FIRSTHDR(&smsg);
	cred = (struct ucred *) CMSG_DATA(cmsg);

	if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
		err("no sender credentials received, message ignored\n");
		return;
	}

	if (cred->uid != 0) {
		err("sender uid=%i, message ignored\n", cred->uid);
		return;
	}

	if (strncmp(ctrl_msg.magic, UDEVD_CTRL_MAGIC, sizeof(UDEVD_CTRL_MAGIC)) != 0 ) {
		err("message magic '%s' doesn't match, ignore it\n", ctrl_msg.magic);
		return;
	}

	switch (ctrl_msg.type) {
	case UDEVD_CTRL_ENV:
		pos = strchr(ctrl_msg.buf, '=');
		if (pos == NULL) {
			err("wrong key format '%s'\n", ctrl_msg.buf);
			break;
		}
		pos[0] = '\0';
		if (pos[1] == '\0') {
			info("udevd message (ENV) received, unset '%s'\n", ctrl_msg.buf);
			unsetenv(ctrl_msg.buf);
		} else {
			info("udevd message (ENV) received, set '%s=%s'\n", ctrl_msg.buf, &pos[1]);
			setenv(ctrl_msg.buf, &pos[1], 1);
		}
		break;
	case UDEVD_CTRL_STOP_EXEC_QUEUE:
		info("udevd message (STOP_EXEC_QUEUE) received\n");
		stop_exec_q = 1;
		break;
	case UDEVD_CTRL_START_EXEC_QUEUE:
		info("udevd message (START_EXEC_QUEUE) received\n");
		stop_exec_q = 0;
		msg_queue_manager();
		break;
	case UDEVD_CTRL_SET_LOG_LEVEL:
		intval = (int *) ctrl_msg.buf;
		info("udevd message (SET_LOG_PRIORITY) received, udev_log_priority=%i\n", *intval);
		udev_log_priority = *intval;
		sprintf(udev_log, "UDEV_LOG=%i", udev_log_priority);
		putenv(udev_log);
		break;
	case UDEVD_CTRL_SET_MAX_CHILDS:
		intval = (int *) ctrl_msg.buf;
		info("udevd message (UDEVD_SET_MAX_CHILDS) received, max_childs=%i\n", *intval);
		max_childs = *intval;
		break;
	case UDEVD_CTRL_SET_MAX_CHILDS_RUNNING:
		intval = (int *) ctrl_msg.buf;
		info("udevd message (UDEVD_SET_MAX_CHILDS_RUNNING) received, max_childs=%i\n", *intval);
		max_childs_running = *intval;
		break;
	case UDEVD_CTRL_RELOAD_RULES:
		info("udevd message (RELOAD_RULES) received\n");
		reload_config = 1;
		break;
	default:
		err("unknown control message type\n");
	}
}

/* receive the kernel user event message and do some sanity checks */
static struct udevd_uevent_msg *get_netlink_msg(void)
{
	struct udevd_uevent_msg *msg;
	int bufpos;
	ssize_t size;
	static char buffer[UEVENT_BUFFER_SIZE+512];
	char *pos;

	size = recv(uevent_netlink_sock, &buffer, sizeof(buffer), 0);
	if (size <  0) {
		if (errno != EINTR)
			err("unable to receive kernel netlink message: %s\n", strerror(errno));
		return NULL;
	}

	if ((size_t)size > sizeof(buffer)-1)
		size = sizeof(buffer)-1;
	buffer[size] = '\0';
	dbg("uevent_size=%zi\n", size);

	/* start of event payload */
	bufpos = strlen(buffer)+1;
	msg = get_msg_from_envbuf(&buffer[bufpos], size-bufpos);
	if (msg == NULL)
		return NULL;

	/* validate message */
	pos = strchr(buffer, '@');
	if (pos == NULL) {
		err("invalid uevent '%s'\n", buffer);
		free(msg);
		return NULL;
	}
	pos[0] = '\0';

	if (msg->action == NULL) {
		info("no ACTION in payload found, skip event '%s'\n", buffer);
		free(msg);
		return NULL;
	}

	if (strcmp(msg->action, buffer) != 0) {
		err("ACTION in payload does not match uevent, skip event '%s'\n", buffer);
		free(msg);
		return NULL;
	}

	return msg;
}

static void asmlinkage sig_handler(int signum)
{
	switch (signum) {
		case SIGINT:
		case SIGTERM:
			udev_exit = 1;
			break;
		case SIGCHLD:
			/* set flag, then write to pipe if needed */
			sigchilds_waiting = 1;
			break;
		case SIGHUP:
			reload_config = 1;
			break;
	}

	/* write to pipe, which will wakeup select() in our mainloop */
	write(signal_pipe[WRITE_END], "", 1);
}

static void udev_done(int pid, int exitstatus)
{
	/* find msg associated with pid and delete it */
	struct udevd_uevent_msg *msg;

	list_for_each_entry(msg, &running_list, node) {
		if (msg->pid == pid) {
			info("seq %llu, pid [%d] exit with %i, %ld seconds old\n", msg->seqnum, msg->pid,
			     exitstatus, time(NULL) - msg->queue_time);
			msg->exitstatus = exitstatus;
			msg_queue_delete(msg);

			/* there may be events waiting with the same devpath */
			run_exec_q = 1;
			return;
		}
	}
}

static void reap_sigchilds(void)
{
	pid_t pid;
	int status;

	while (1) {
		pid = waitpid(-1, &status, WNOHANG);
		if (pid <= 0)
			break;
		if (WIFEXITED(status))
			status = WEXITSTATUS(status);
		else if (WIFSIGNALED(status))
			status = WTERMSIG(status) + 128;
		else
			status = 0;
		udev_done(pid, status);
	}
}

static int init_udevd_socket(void)
{
	struct sockaddr_un saddr;
	socklen_t addrlen;
	const int feature_on = 1;
	int retval;

	memset(&saddr, 0x00, sizeof(saddr));
	saddr.sun_family = AF_LOCAL;
	/* use abstract namespace for socket path */
	strcpy(&saddr.sun_path[1], UDEVD_CTRL_SOCK_PATH);
	addrlen = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(&saddr.sun_path[1]);

	udevd_sock = socket(AF_LOCAL, SOCK_DGRAM, 0);
	if (udevd_sock == -1) {
		err("error getting socket: %s\n", strerror(errno));
		return -1;
	}

	/* the bind takes care of ensuring only one copy running */
	retval = bind(udevd_sock, (struct sockaddr *) &saddr, addrlen);
	if (retval < 0) {
		err("bind failed: %s\n", strerror(errno));
		close(udevd_sock);
		udevd_sock = -1;
		return -1;
	}

	/* enable receiving of the sender credentials */
	setsockopt(udevd_sock, SOL_SOCKET, SO_PASSCRED, &feature_on, sizeof(feature_on));

	return 0;
}

static int init_uevent_netlink_sock(void)
{
	struct sockaddr_nl snl;
	const int buffersize = 16 * 1024 * 1024;
	int retval;

	memset(&snl, 0x00, sizeof(struct sockaddr_nl));
	snl.nl_family = AF_NETLINK;
	snl.nl_pid = getpid();
	snl.nl_groups = 1;

	uevent_netlink_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
	if (uevent_netlink_sock == -1) {
		err("error getting socket: %s\n", strerror(errno));
		return -1;
	}

	/* set receive buffersize */
	setsockopt(uevent_netlink_sock, SOL_SOCKET, SO_RCVBUFFORCE, &buffersize, sizeof(buffersize));

	retval = bind(uevent_netlink_sock, (struct sockaddr *) &snl, sizeof(struct sockaddr_nl));
	if (retval < 0) {
		err("bind failed: %s\n", strerror(errno));
		close(uevent_netlink_sock);
		uevent_netlink_sock = -1;
		return -1;
	}
	return 0;
}

static void export_initial_seqnum(void)
{
	char filename[PATH_SIZE];
	int fd;
	char seqnum[32];
	ssize_t len = 0;

	strlcpy(filename, sysfs_path, sizeof(filename));
	strlcat(filename, "/kernel/uevent_seqnum", sizeof(filename));
	fd = open(filename, O_RDONLY);
	if (fd >= 0) {
		len = read(fd, seqnum, sizeof(seqnum)-1);
		close(fd);
	}
	if (len <= 0) {
		strcpy(seqnum, "0\n");
		len = 3;
	}
	strlcpy(filename, udev_root, sizeof(filename));
	strlcat(filename, "/" EVENT_SEQNUM, sizeof(filename));
	create_path(filename);
	fd = open(filename, O_WRONLY|O_TRUNC|O_CREAT, 0644);
	if (fd >= 0) {
		write(fd, seqnum, len);
		close(fd);
	}
}

int main(int argc, char *argv[], char *envp[])
{
	int retval;
	int fd;
	struct sigaction act;
	fd_set readfds;
	const char *value;
	int daemonize = 0;
	int option;
	static const struct option options[] = {
		{ "daemon", 0, NULL, 'd' },
		{ "debug-trace", 0, NULL, 't' },
		{ "debug", 0, NULL, 'D' },
		{ "help", 0, NULL, 'h' },
		{ "version", 0, NULL, 'V' },
		{}
	};
	int rc = 1;
	int maxfd;

	logging_init("udevd");
	udev_config_init();
	selinux_init();
	dbg("version %s\n", UDEV_VERSION);

	while (1) {
		option = getopt_long(argc, argv, "dDthV", options, NULL);
		if (option == -1)
			break;

		switch (option) {
		case 'd':
			daemonize = 1;
			break;
		case 't':
			debug_trace = 1;
			break;
		case 'D':
			debug = 1;
			if (udev_log_priority < LOG_INFO)
				udev_log_priority = LOG_INFO;
			break;
		case 'h':
			printf("Usage: udevd [--help] [--daemon] [--debug-trace] [--debug] [--version]\n");
			goto exit;
		case 'V':
			printf("%s\n", UDEV_VERSION);
			goto exit;
		default:
			goto exit;
		}
	}

	if (getuid() != 0) {
		fprintf(stderr, "root privileges required\n");
		err("root privileges required\n");
		goto exit;
	}

	/* make sure std{in,out,err} fd's are in a sane state */
	fd = open("/dev/null", O_RDWR);
	if (fd < 0) {
		fprintf(stderr, "cannot open /dev/null\n");
		err("cannot open /dev/null\n");
	}
	if (fd > STDIN_FILENO)
		dup2(fd, STDIN_FILENO);
	if (write(STDOUT_FILENO, 0, 0) < 0)
		dup2(fd, STDOUT_FILENO);
	if (write(STDERR_FILENO, 0, 0) < 0)
		dup2(fd, STDERR_FILENO);

	/* init sockets to receive events */
	if (init_udevd_socket() < 0) {
		if (errno == EADDRINUSE) {
			fprintf(stderr, "another udev daemon already running\n");
			err("another udev daemon already running\n");
			rc = 1;
		} else {
			fprintf(stderr, "error initializing udevd socket\n");
			err("error initializing udevd socket\n");
			rc = 2;
		}
		goto exit;
	}

	if (init_uevent_netlink_sock() < 0) {
		fprintf(stderr, "error initializing netlink socket\n");
		err("error initializing netlink socket\n");
		rc = 3;
		goto exit;
	}

	/* setup signal handler pipe */
	retval = pipe(signal_pipe);
	if (retval < 0) {
		err("error getting pipes: %s\n", strerror(errno));
		goto exit;
	}

	retval = fcntl(signal_pipe[READ_END], F_GETFL, 0);
	if (retval < 0) {
		err("error fcntl on read pipe: %s\n", strerror(errno));
		goto exit;
	}
	retval = fcntl(signal_pipe[READ_END], F_SETFL, retval | O_NONBLOCK);
	if (retval < 0) {
		err("error fcntl on read pipe: %s\n", strerror(errno));
		goto exit;
	}

	retval = fcntl(signal_pipe[WRITE_END], F_GETFL, 0);
	if (retval < 0) {
		err("error fcntl on write pipe: %s\n", strerror(errno));
		goto exit;
	}
	retval = fcntl(signal_pipe[WRITE_END], F_SETFL, retval | O_NONBLOCK);
	if (retval < 0) {
		err("error fcntl on write pipe: %s\n", strerror(errno));
		goto exit;
	}

	/* parse the rules and keep them in memory */
	sysfs_init();
	udev_rules_init(&rules, 1);

	export_initial_seqnum();

	if (daemonize) {
		pid_t pid;

		pid = fork();
		switch (pid) {
		case 0:
			dbg("daemonized fork running\n");
			break;
		case -1:
			err("fork of daemon failed: %s\n", strerror(errno));
			rc = 4;
			goto exit;
		default:
			dbg("child [%u] running, parent exits\n", pid);
			rc = 0;
			goto exit;
		}
	}

	/* redirect std{out,err} fd's */
	if (!debug)
		dup2(fd, STDOUT_FILENO);
	dup2(fd, STDERR_FILENO);
	if (fd > STDERR_FILENO)
		close(fd);

	/* set scheduling priority for the daemon */
	setpriority(PRIO_PROCESS, 0, UDEVD_PRIORITY);

	chdir("/");
	umask(022);

	/* become session leader */
	sid = setsid();
	dbg("our session is %d\n", sid);

	/* OOM_DISABLE == -17 */
	fd = open("/proc/self/oom_adj", O_RDWR);
	if (fd < 0)
		err("error disabling OOM: %s\n", strerror(errno));
	else {
		write(fd, "-17", 3);
		close(fd);
	}

	fd = open("/dev/kmsg", O_WRONLY);
	if (fd > 0) {
		const char *str = "<6>udevd version " UDEV_VERSION " started\n";

		write(fd, str, strlen(str));
		close(fd);
	}

	/* set signal handlers */
	memset(&act, 0x00, sizeof(struct sigaction));
	act.sa_handler = (void (*)(int)) sig_handler;
	sigemptyset(&act.sa_mask);
	act.sa_flags = SA_RESTART;
	sigaction(SIGINT, &act, NULL);
	sigaction(SIGTERM, &act, NULL);
	sigaction(SIGCHLD, &act, NULL);
	sigaction(SIGHUP, &act, NULL);

	/* watch rules directory */
	inotify_fd = inotify_init();
	if (inotify_fd >= 0) {
		char filename[PATH_MAX];

		inotify_add_watch(inotify_fd, udev_rules_dir, IN_CREATE | IN_DELETE | IN_MOVE | IN_CLOSE_WRITE);

		/* watch dynamic rules directory */
		strlcpy(filename, udev_root, sizeof(filename));
		strlcat(filename, "/"RULES_DYN_DIR, sizeof(filename));
		inotify_add_watch(inotify_fd, filename, IN_CREATE | IN_DELETE | IN_MOVE | IN_CLOSE_WRITE);
	} else if (errno == ENOSYS)
		err("the kernel does not support inotify, udevd can't monitor rules file changes\n");
	else
		err("inotify_init failed: %s\n", strerror(errno));

	/* maximum limit of forked childs */
	value = getenv("UDEVD_MAX_CHILDS");
	if (value)
		max_childs = strtoul(value, NULL, 10);
	else {
		int memsize = mem_size_mb();
		if (memsize > 0)
			max_childs = 128 + (memsize / 4);
		else
			max_childs = UDEVD_MAX_CHILDS;
	}
	info("initialize max_childs to %u\n", max_childs);

	/* start to throttle forking if maximum number of _running_ childs is reached */
	value = getenv("UDEVD_MAX_CHILDS_RUNNING");
	if (value)
		max_childs_running = strtoull(value, NULL, 10);
	else {
		int cpus = cpu_count();
		if (cpus > 0)
			max_childs_running = 8 + (8 * cpus);
		else
			max_childs_running = UDEVD_MAX_CHILDS_RUNNING;
	}
	info("initialize max_childs_running to %u\n", max_childs_running);

	/* clear environment for forked event processes */
	clearenv();

	/* export log_priority , as called programs may want to follow that setting */
	sprintf(udev_log, "UDEV_LOG=%i", udev_log_priority);
	putenv(udev_log);
	if (debug_trace)
		putenv("DEBUG=1");

	maxfd = udevd_sock;
	maxfd = UDEV_MAX(maxfd, uevent_netlink_sock);
	maxfd = UDEV_MAX(maxfd, signal_pipe[READ_END]);
	maxfd = UDEV_MAX(maxfd, inotify_fd);

	while (!udev_exit) {
		struct udevd_uevent_msg *msg;
		int fdcount;

		FD_ZERO(&readfds);
		FD_SET(signal_pipe[READ_END], &readfds);
		FD_SET(udevd_sock, &readfds);
		FD_SET(uevent_netlink_sock, &readfds);
		if (inotify_fd >= 0)
			FD_SET(inotify_fd, &readfds);

		fdcount = select(maxfd+1, &readfds, NULL, NULL, NULL);
		if (fdcount < 0) {
			if (errno != EINTR)
				err("error in select: %s\n", strerror(errno));
			continue;
		}

		/* get control message */
		if (FD_ISSET(udevd_sock, &readfds))
			get_ctrl_msg();

		/* get netlink message */
		if (FD_ISSET(uevent_netlink_sock, &readfds)) {
			msg = get_netlink_msg();
			if (msg)
				msg_queue_insert(msg);
		}

		/* received a signal, clear our notification pipe */
		if (FD_ISSET(signal_pipe[READ_END], &readfds)) {
			char buf[256];

			read(signal_pipe[READ_END], &buf, sizeof(buf));
		}

		/* rules directory inotify watch */
		if ((inotify_fd >= 0) && FD_ISSET(inotify_fd, &readfds)) {
			int nbytes;

			/* discard all possible events, we can just reload the config */
			if ((ioctl(inotify_fd, FIONREAD, &nbytes) == 0) && nbytes > 0) {
				char *buf;

				reload_config = 1;
				buf = malloc(nbytes);
				if (buf == NULL) {
					err("error getting buffer for inotify, disable watching\n");
					close(inotify_fd);
					inotify_fd = -1;
				}
				read(inotify_fd, buf, nbytes);
				free(buf);
			}
		}

		/* rules changed, set by inotify or a HUP signal */
		if (reload_config) {
			reload_config = 0;
			udev_rules_cleanup(&rules);
			udev_rules_init(&rules, 1);
		}

		/* forked child has returned */
		if (sigchilds_waiting) {
			sigchilds_waiting = 0;
			reap_sigchilds();
		}

		if (run_exec_q) {
			run_exec_q = 0;
			if (!stop_exec_q)
				msg_queue_manager();
		}
	}
	rc = 0;

exit:
	udev_rules_cleanup(&rules);
	sysfs_cleanup();
	selinux_exit();

	if (signal_pipe[READ_END] >= 0)
		close(signal_pipe[READ_END]);
	if (signal_pipe[WRITE_END] >= 0)
		close(signal_pipe[WRITE_END]);

	if (udevd_sock >= 0)
		close(udevd_sock);
	if (inotify_fd >= 0)
		close(inotify_fd);
	if (uevent_netlink_sock >= 0)
		close(uevent_netlink_sock);

	logging_close();

	return rc;
}
