/* -*-pgsql-c-*- */
/*
 * $Header: /home/t-ishii/repository/pgpool/main.c,v 1.25 2005/01/05 04:15:20 t-ishii Exp $
 *
 * pgpool: a language independent connection pool server for PostgreSQL 
 * written by Tatsuo Ishii
 *
 * Copyright (c) 2003-2005	Tatsuo Ishii
 *
 * Permission to use, copy, modify, and distribute this software and
 * its documentation for any purpose and without fee is hereby
 * granted, provided that the above copyright notice appear in all
 * copies and that both that copyright notice and this permission
 * notice appear in supporting documentation, and that the name of the
 * author not be used in advertising or publicity pertaining to
 * distribution of the software without specific, written prior
 * permission. The author makes no representations about the
 * suitability of this software for any purpose.  It is provided "as
 * is" without express or implied warranty.
 */
#include "pool.h"

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/un.h>
#include <netdb.h>
#include <arpa/inet.h>

#include <sys/stat.h>
#include <fcntl.h>

#include <sys/wait.h>

#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

#include <signal.h>

#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif

#include "version.h"

#define PGPOOLMAXLITSENQUEUELENGTH 10000
static void daemonize(void);
static int read_pid_file(void);
static void write_pid_file(void);
static pid_t fork_a_child(int unix_fd, int inet_fd);
static int create_unix_domain_socket(void);
static int create_inet_domain_socket(const char *hostname);
static void myexit(int code);

static RETSIGTYPE exit_handler(int sig);
static RETSIGTYPE reap_handler(int sig);
static RETSIGTYPE failover_handler(int sig);

static void usage(void);
static void stop_me(void);

/* set unix domain socket path */
struct sockaddr_un un_addr;

/* child pid table */
pid_t *pids;

int unix_fd;
int inet_fd;

int exiting = 0;
int switching = 0;

int not_detach = 0;
int debug = 0;

pid_t mypid;

long int weight_master;	/* normalized weight of master (0-RAND_MAX range) */

static int stop_sig = SIGTERM;

static int degenerated = 0;	/* set non 0 if already degerated */

/*
* pgpool main program
*/
int main(int argc, char **argv)
{
	int opt;
	char conf_file[POOLMAXPATHLEN+1];
	int i;
	int pid;

	snprintf(conf_file, sizeof(conf_file), "%s/%s", DEFAULT_CONFIGDIR, POOL_CONF_FILE_NAME);

	while ((opt = getopt(argc, argv, "f:hm:nd")) != -1)
	{
		switch (opt)
		{
			case 'f':
				if (!optarg)
				{
					usage();
					exit(1);
				}
				strncpy(conf_file, optarg, sizeof(conf_file));
				break;
			case 'h':
				usage();
				exit(0);
				break;
			case 'm':
				if (!optarg)
				{
					usage();
					exit(1);
				}
				if (*optarg == 's' || !strcmp("smart", optarg))
					stop_sig = SIGTERM;		/* smart shutdown */
				else if (*optarg == 'f' || !strcmp("fast", optarg))
					stop_sig = SIGINT;		/* fast shutdown */
				else if (*optarg == 'i' || !strcmp("immediate", optarg))
					stop_sig = SIGQUIT;		/* immediate shutdown */
				else
				{
					usage();
					exit(1);
				}
				break;
				
			case 'n':
				not_detach = 1;
				break;
			case 'd':
				debug = 1;
				break;
			default:
				usage();
				exit(1);
		}
	}

	if (pool_get_config(conf_file))
	{
		pool_error("Unable to get configuration. Exiting...");
		mypid = getpid();
		myexit(1);
	}

	/* set current PostgreSQL backend */
	pool_config.current_backend_host_name = pool_config.backend_host_name;
	pool_config.current_backend_port = pool_config.backend_port;

	/* set load balance weight */
	weight_master = (RAND_MAX) * (pool_config.weight_master /
						   (pool_config.weight_master + pool_config.weight_secondary));
	pool_debug("weight: %ld", weight_master);

	if (optind == (argc - 1) && !strcmp(argv[optind], "stop"))
	{
		stop_me();
		exit(0);
	}
	else if (optind == argc)
	{
		pid = read_pid_file();
		if (pid > 0)
		{
			if (kill(pid, 0) == 0)
			{
				fprintf(stderr, "pid file found. is another pgpool(%d) is running?\n", pid);
				exit(1);
			}
			else
				fprintf(stderr, "pid file found but it seems bogus. Trying to start pgpool anyway...\n");
		}
	}
	else if (optind < argc)
	{
		usage();
		exit(1);
	}


	/* set signal masks */
	poolinitmask();

	if (not_detach)
		write_pid_file();
	else
		daemonize();

	mypid = getpid();

	/* set unix domain socket path */
	snprintf(un_addr.sun_path, sizeof(un_addr.sun_path), "%s/.s.PGSQL.%d",
			 pool_config.socket_dir,
			 pool_config.port);

	/* set up signal handlers */
	pool_signal(SIGPIPE, SIG_IGN);

	/* create unix domain socket */
	unix_fd = create_unix_domain_socket();

	/* create inet domain socket if any */
	if (pool_config.listen_addresses[0])
	{
		inet_fd = create_inet_domain_socket(pool_config.listen_addresses);
	}

	pids = malloc(pool_config.num_init_children * sizeof(pool_config.num_init_children));
	if (pids == NULL)
	{
		pool_error("failed to allocate pids");
		myexit(1);
	}
	memset(pids, 0, pool_config.num_init_children * sizeof(pool_config.num_init_children));

	/* fork the children */
	for (i=0;i<pool_config.num_init_children;i++)
	{
		pids[i] = fork_a_child(unix_fd, inet_fd);
	}

	/* set up signal handlers */
	pool_signal(SIGTERM, exit_handler);
	pool_signal(SIGINT, exit_handler);
	pool_signal(SIGQUIT, exit_handler);
	pool_signal(SIGCHLD, reap_handler);
	pool_signal(SIGUSR1, failover_handler);
	pool_signal(SIGUSR2, failover_handler);

	for (;;)
	{
#ifdef HEALTH_CHECK
		int sts;

		sleep(5);

		if (!degenerated)
		{
			sts = health_check();
			if (sts == -1)
			{
				failover_handler(SIGUSR1);
			}
			else if (sts == -2)
			{
				failover_handler(SIGUSR2);
			}
		}
#else
		pause();
#endif
	}
	return 0;
}

static void usage(void)
{
	fprintf(stderr, "pgpool version %s(%s),\n",	VERSION, PGPOOLVERSION);
	fprintf(stderr, "  a generic connection pool/replication/load balance server for PostgreSQL\n\n");
	fprintf(stderr, "usage: pgpool [-f config_file][-n][-d][-h][[-m {s[mart]|f[ast]|i[mmediate]}] stop]\n");
	fprintf(stderr, "  config_file default path: %s/%s\n",DEFAULT_CONFIGDIR, POOL_CONF_FILE_NAME);
	fprintf(stderr, "  -n: don't run in daemon mode. does not detatch control tty\n");
	fprintf(stderr, "  -d: debug mode. lots of debug information will be printed\n");
	fprintf(stderr, "  stop: stop pgpool\n");
	fprintf(stderr, "  -h: print this help\n");
}

/*
* detatch control ttys
*/
static void daemonize(void)
{
	int			i;
	pid_t		pid;

	pid = fork();
	if (pid == (pid_t) -1)
	{
		pool_error("fork() failed. reason: %s", strerror(errno));
		exit(1);
		return;					/* not reached */
	}
	else if (pid > 0)
	{			/* parent */
		exit(0);
	}

#ifdef HAVE_SETSID
	if (setsid() < 0)
	{
		pool_error("setsid() failed. reason:%s", strerror(errno));
		exit(1);
	}
#endif

	i = open("/dev/null", O_RDWR);
	dup2(i, 0);
	dup2(i, 1);
	dup2(i, 2);
	close(i);

	write_pid_file();
}

/*
* stop myself
*/
static void stop_me(void)
{
	FILE *fd;
	char path[POOLMAXPATHLEN];
	char pidbuf[128];
	pid_t pid;

	snprintf(path, sizeof(path), "%s/%s", pool_config.logdir, PID_FILE_NAME);
	fd = fopen(path, "r");
	if (!fd)
	{
		pool_error("could not open pid file as %s. reason: %s",
				   path, strerror(errno));
		exit(1);
	}

	memset(pidbuf, 0, sizeof(pidbuf));
	fread(pidbuf, sizeof(pidbuf), 1, fd);
	pid = atoi(pidbuf);
	fclose(fd);

	if (kill(pid, stop_sig) == -1)
	{
		pool_error("could not stop pid: %d. reason: %s", pid, strerror(errno));
		exit(1);
	}

	fprintf(stderr, "stop request sent to pgpool. waiting for termination...");

	while (kill(pid, 0) == 0)
	{
		fprintf(stderr, ".");
		sleep(1);
	}
	fprintf(stderr, "done.\n");
}

/*
* read the pid file
*/
static int read_pid_file(void)
{
	FILE *fd;
	char path[POOLMAXPATHLEN];
	char pidbuf[128];

	snprintf(path, sizeof(path), "%s/%s", pool_config.logdir, PID_FILE_NAME);
	fd = fopen(path, "r");
	if (!fd)
	{
		return -1;
	}
	if (fread(pidbuf, 1, sizeof(pidbuf), fd) <= 0)
	{
		pool_error("could not read pid file as %s. reason: %s",
				   path, strerror(errno));
		fclose(fd);
		return -1;
	}
	fclose(fd);
	return(atoi(pidbuf));
}

/*
* write the pid file
*/
static void write_pid_file(void)
{
	FILE *fd;
	char path[POOLMAXPATHLEN];
	char pidbuf[128];

	snprintf(path, sizeof(path), "%s/%s", pool_config.logdir, PID_FILE_NAME);
	fd = fopen(path, "w");
	if (!fd)
	{
		pool_error("could not open pid file as %s. reason: %s",
				   path, strerror(errno));
		exit(1);
	}
	snprintf(pidbuf, sizeof(pidbuf), "%d", (int)getpid());
	fwrite(pidbuf, strlen(pidbuf), 1, fd);
	if (fclose(fd))
	{
		pool_error("could not write pid file as %s. reason: %s",
				   path, strerror(errno));
		exit(1);
	}
}

/*
* fork a child
*/
pid_t fork_a_child(int unix_fd, int inet_fd)
{
	pid_t pid;

	pid = fork();

	if (pid == 0)
	{
		/* call child main */
		POOL_SETMASK(&UnBlockSig);
		do_child(unix_fd, inet_fd);
	}
	else if (pid == -1)
	{
		pool_error("fork() failed. reason: %s", strerror(errno));
		myexit(1);
	}
	return pid;
}

/*
* create inet domain socket
*/
static int create_inet_domain_socket(const char *hostname)
{
	struct sockaddr_in addr;
	int fd;
	int status;
	int one = 1;
	int len;

	fd = socket(AF_INET, SOCK_STREAM, 0);
	if (fd == -1)
	{
		pool_error("Failed to create INET domain socket. reason: %s", strerror(errno));
		myexit(1);
	}
	if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one,
					sizeof(one))) == -1)
	{
		pool_error("setsockopt() failed. reason: %s", strerror(errno));
		myexit(1);
	}

	memset((char *) &addr, 0, sizeof(addr));
	((struct sockaddr *)&addr)->sa_family = AF_INET;

	if (strcmp(hostname, "*")==0)
	{
		addr.sin_addr.s_addr = htonl(INADDR_ANY);
	}
	else
	{
		struct hostent *hostinfo;

		hostinfo = gethostbyname(hostname);
		if (!hostinfo)
		{
			pool_error("could not resolve host name \"%s\": %s", hostname, hstrerror(h_errno));
			myexit(1);
		}
		addr.sin_addr = *(struct in_addr *) hostinfo->h_addr;
	}

	addr.sin_port = htons(pool_config.port);
	len = sizeof(struct sockaddr_in);
	status = bind(fd, (struct sockaddr *)&addr, len);
	if (status == -1)
	{
		pool_error("bind() failed. reason: %s", strerror(errno));
		myexit(1);
	}

	status = listen(fd, PGPOOLMAXLITSENQUEUELENGTH);
	if (status < 0)
	{
		pool_error("listen() failed. reason: %s", strerror(errno));
		myexit(1);
	}
	return fd;
}

/*
* create UNIX domain socket
*/
static int create_unix_domain_socket(void)
{
	struct sockaddr_un addr;
	int fd;
	int status;
	int len;

	fd = socket(AF_UNIX, SOCK_STREAM, 0);
	if (fd == -1)
	{
		pool_error("Failed to create UNIX domain socket. reason: %s", strerror(errno));
		myexit(1);
	}
	memset((char *) &addr, 0, sizeof(addr));
	((struct sockaddr *)&addr)->sa_family = AF_UNIX;
	snprintf(addr.sun_path, sizeof(addr.sun_path), un_addr.sun_path);
	len = sizeof(struct sockaddr_un);
	status = bind(fd, (struct sockaddr *)&addr, len);
	if (status == -1)
	{
		pool_error("bind() failed. reason: %s", strerror(errno));
		myexit(1);
	}

	if (chmod(un_addr.sun_path, 0777) == -1)
	{
		pool_error("chmod() failed. reason: %s", strerror(errno));
		myexit(1);
	}

	status = listen(fd, PGPOOLMAXLITSENQUEUELENGTH);
	if (status < 0)
	{
		pool_error("listen() failed. reason: %s", strerror(errno));
		myexit(1);
	}
	return fd;
}

static void myexit(int code)
{
	char path[POOLMAXPATHLEN];

	if (getpid() != mypid)
		return;

	unlink(un_addr.sun_path);
	snprintf(path, sizeof(path), "%s/%s", pool_config.logdir, PID_FILE_NAME);
	unlink(path);

	exit(code);
}

/* notice backend connection error using SIGUSR1 or SIGUSR2 */
void notice_backend_error(int master)
{
	pid_t parent = getppid();

	pool_log("notice_backend_error: master: %d fail over request from pid %d", master, getpid());

	if (master)
		kill(parent, SIGUSR1);
	else
		kill(parent, SIGUSR2);

	/* avoid race conditon with SIGCHLD */
#ifdef NOT_USED
	sleep(1);
#endif
}

static RETSIGTYPE exit_handler(int sig)
{
	int i;

	POOL_SETMASK(&BlockSig);

	/*
	 * this could happend in a child process if a signal has been sent
	 * before resetting signal handler
	 */
	if (getpid() != mypid)
	{
		pool_debug("exit_handler: I am not parent");
		POOL_SETMASK(&UnBlockSig);
		exit(0);
	}

	pool_debug("exit_handler called");

	exiting = 1;

	for (i = 0; i < pool_config.num_init_children; i++)
	{
		pid_t pid = pids[i];
		if (pid)
		{
			kill(pid, sig);
		}
	}
	while (wait(NULL) > 0)
		;

	if (errno != ECHILD)
		pool_error("wait() failed. reason:%s", strerror(errno));

	POOL_SETMASK(&UnBlockSig);

	myexit(0);
}


/*
 * handle SIGUSR1/SIGUSR2 (backend connection error, fail over request, if possible)
 *
 * if sig == SIGUSR1, we assume that the master has been down.
 * if sig == SIGUSR2, we assume that the secondary has been down.
 */
static RETSIGTYPE failover_handler(int sig)
{
	int i;
	int replication = 0;

	POOL_SETMASK(&BlockSig);

	pool_debug("failover_handler called");

	/*
	 * this could happen in a child process if a signal has been sent
	 * before resetting signal handler
	 */
	if (getpid() != mypid)
	{
		pool_debug("failover_handler: I am not parent");
		POOL_SETMASK(&UnBlockSig);
		return;
	}

	if (exiting)
	{
		POOL_SETMASK(&UnBlockSig);
		return;
	}

	if (switching)
	{
		POOL_SETMASK(&UnBlockSig);
		return;
	}

#ifdef NOT_USED
	/* secondary backend exists? */
	if (pool_config.secondary_backend_port == 0)
		return;
#endif

	/* 
	 * if not in replication mode, we treat this a restart request.
	 * otherwise we need to check if we have already failovered.
	 */
	if (!pool_config.replication_enabled ||
		strcmp(pool_config.current_backend_host_name, pool_config.secondary_backend_host_name) ||
		pool_config.current_backend_port != pool_config.secondary_backend_port)
	{
		switching = 1;
		
		if (pool_config.replication_enabled)
		{
			replication = 1;
			degenerated = 1;

			if (sig == SIGUSR2)
			{
				pool_log("starting degeneration. shutdown secondary host %s(%d)",
						 pool_config.secondary_backend_host_name,
						 pool_config.secondary_backend_port);
			}
			else
			{
				pool_log("starting degeneration. shutdown master host %s(%d)",
						 pool_config.backend_host_name,
						 pool_config.backend_port);
			}
		}
		else if (!degenerated && pool_config.secondary_backend_port != 0)
		{
			pool_log("starting failover from %s(%d) to %s(%d)",
					   pool_config.current_backend_host_name,
					   pool_config.current_backend_port,
					   pool_config.secondary_backend_host_name,
					   pool_config.secondary_backend_port);
		}
		else
		{
			pool_log("restarting pgpool");
		}

		/* kill all children */
		for (i = 0; i < pool_config.num_init_children; i++)
		{
			pid_t pid = pids[i];
			if (pid)
			{
				kill(pid, SIGQUIT);
				pool_debug("kill %d", pid);
			}
		}

		while (wait(NULL) > 0)
			;

		if (errno != ECHILD)
			pool_error("wait() failed. reason:%s", strerror(errno));

		if (pool_config.replication_enabled)
		{
			/* disable replicaton mode */
			pool_config.replication_enabled = 0;

			if (sig == SIGUSR1)
			{
				pool_config.current_backend_host_name = pool_config.secondary_backend_host_name;
				pool_config.current_backend_port = pool_config.secondary_backend_port;
			}
		}
		else if (!degenerated && pool_config.secondary_backend_port != 0)
		{
			/* fail over to secondary */
			pool_config.current_backend_host_name = pool_config.secondary_backend_host_name;
			pool_config.current_backend_port = pool_config.secondary_backend_port;
		}

		/* fork the children */
		for (i=0;i<pool_config.num_init_children;i++)
		{
			pids[i] = fork_a_child(unix_fd, inet_fd);
		}

		/*
		 * do not close unix_fd and inet_fd here. if a child dies we
		 * need to fork a new child which should inherit these fds.
		 */

		if (replication)
		{
			if (sig == SIGUSR2)
			{
				pool_log("degenration done. shutdown secondary host %s(%d)",
						 pool_config.secondary_backend_host_name,
						 pool_config.secondary_backend_port);
			}
			else
			{
				pool_log("degeneration done. shutdown master host %s(%d)",
						 pool_config.backend_host_name,
						 pool_config.backend_port);
			}
		}
		else if (!degenerated && pool_config.secondary_backend_port != 0)
		{
			pool_log("failover from %s(%d) to %s(%d) done.",
					   pool_config.backend_host_name,
					   pool_config.backend_port,
					   pool_config.secondary_backend_host_name,
					   pool_config.secondary_backend_port);
		}
		else
		{
			pool_log("restarting pgpool done.");
		}

		switching = 0;
	}
	POOL_SETMASK(&UnBlockSig);
}

/*
 * handle SIGCHLD
 */
static RETSIGTYPE reap_handler(int sig)
{
	pid_t pid;
	int status;
	int i;

	POOL_SETMASK(&BlockSig);

	pool_debug("reap_handler called");

	if (exiting)
	{
		POOL_SETMASK(&UnBlockSig);
		return;
	}

	if (switching)
	{
		POOL_SETMASK(&UnBlockSig);
		return;
	}

#ifdef HAVE_WAITPID
	while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
	{
#else
		while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
		{
#endif

			pool_debug("child %d exits with status %d by signal %d", pid, status, WTERMSIG(status));

			/* look for exiting child's pid */
			for (i=0;i<pool_config.num_init_children;i++)
			{
				if (pid == pids[i])
				{
					/* if found, fork a new child */
					if (!switching && !exiting && status)
					{
						pids[i] = fork_a_child(unix_fd, inet_fd);
						pool_debug("fork a new child pid %d", pids[i]);
						break;
					}
				}
			}
		}
		POOL_SETMASK(&UnBlockSig);

	}
