/*
 * ufdbGuard is copyrighted (C) 2005,2006,2007 by URLfilterDB with all rights reserved.
 */

#include "ufdb.h"
#include "ufdblib.h"

#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <ctype.h>
#include <signal.h>
#include <errno.h>
#include <pthread.h>
#if HAVE_SYS_SYSCALL_H
#include <sys/syscall.h>
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <pwd.h>

int    UFDBglobalDebug = 0;
char   UFDBglobalUserName[31+1] = "";
char * UFDBglobalLogDir = NULL;
int    UFDBglobalLogBlock = 0;
int    UFDBglobalLogAllRequests = 0;
int    UFDBglobalSafeSearch = 1;
int    UFDBglobalHttpsWithHostname = 0;
int    UFDBglobalHttpsOfficialCertificate = 0;
int    UFDBglobalHttpdPort = 0;
char   UFDBglobalHttpdInterface[256] = "all";
char   UFDBglobalHttpdImagesDirectory[256] = ".";
unsigned long UFDBglobalMaxLogfileSize = 200 * 1024 * 1024;	/* 200 MB */
volatile unsigned long UFDBglobalTunnelCounter = 0;

static long          num_cpus;
static unsigned long cpu_mask = 0UL;
#define UFDB_MAX_CPUS 32

#ifdef UFDB_DEBUG
pthread_mutex_t ufdb_malloc_mutex = UFDB_STATIC_MUTEX_INIT;
#endif
pthread_mutex_t ufdb_regexec_mutex = UFDB_STATIC_MUTEX_INIT;


/*
 *  Some libraries use malloc() ...
 *  But they do not use a mutex, so when these libraries are called,
 *  we must use a mutex around the calls to the libraries.
 */
void ufdbGetMallocMutex( char * fn )
{
#ifdef UFDB_DEBUG
   int res;
#endif

#ifdef UFDB_MALLOC_IS_THREADSAFE
   return;
#endif

#ifdef UFDB_DEBUG
   res = pthread_mutex_lock( &ufdb_malloc_mutex );
   if (res != 0)
      ufdbLogError( "ufdbGetMallocMutex: mutex_lock failed with code %d in %s", res, fn );
#endif
}

void ufdbReleaseMallocMutex( char * fn )
{
#ifdef UFDB_DEBUG
   int res;
#endif

#ifdef UFDB_MALLOC_IS_THREADSAFE
   return;
#endif

#ifdef UFDB_DEBUG
   res = pthread_mutex_unlock( &ufdb_malloc_mutex );
   if (res != 0)
      ufdbLogError( "ufdbReleaseMallocMutex: mutex_unlock failed with code %d in %s", res, fn );
#endif
}


void * ufdbMalloc( size_t elsize )
{
   void * p;

   ufdbGetMallocMutex( "ufdbMalloc" );
   p = (void *) malloc( elsize );
   ufdbReleaseMallocMutex( "ufdbMalloc" );

#if 0
   fprintf( stderr, "          ufdbMalloc( %12d )\n", elsize ); 
#endif

   return p;
}


void * ufdbCalloc( size_t n, size_t num )
{
   void * p;

   ufdbGetMallocMutex( "ufdbMalloc" );
   p = (void *) calloc( n, num );
   ufdbReleaseMallocMutex( "ufdbMalloc" );

   return p;
}



void * ufdbRealloc( void * ptr, size_t elsize )
{
   void * p;

   ufdbGetMallocMutex( "ufdbMalloc" );
   p = (void *) realloc( ptr, elsize );
   ufdbReleaseMallocMutex( "ufdbMalloc" );

   return p;
}


void ufdbFree( void * ptr )
{
   if (ptr != NULL)
   {
      ufdbGetMallocMutex( "ufdbMalloc" );
      free( ptr );
      ufdbReleaseMallocMutex( "ufdbMalloc" );
   }
}


char * ufdbStrdup( const char * s )
{
   int size = strlen( s ) + 1;
   return strcpy( ufdbMalloc(size), s );
}


/* In a multithreaded application, regcomp and regfree must be protected
 * by a malloc mutex since they call malloc and free !
 * Older GLIBC libraries have a regexec implementation which is not thread-safe.
 */
int UFDBregcomp( regex_t * preg, const char * regex, int cflags )
{
   int retval;

   ufdbGetMallocMutex( "UFDBregcomp" );
   retval = regcomp( preg, regex, cflags );
   ufdbReleaseMallocMutex( "UFDBregcomp" );

   return retval;
}


void UFDBregfree( regex_t * preg )
{
   ufdbGetMallocMutex( "UFDBregfree" );
   regfree( preg );
   ufdbReleaseMallocMutex( "UFDBregfree" );
}


#if defined(__GLIBC__)
#if (__GLIBC__ > 2) || (__GLIBC__ == 2  &&  __GLIBC_MINOR__ >= 4)
#define NEED_REGEXEC_MUTEX 0
#else
#define NEED_REGEXEC_MUTEX 1
#endif
#else
#define NEED_REGEXEC_MUTEX 0
#endif

int UFDBregexec( const regex_t * preg, const char * string, size_t nmatch, regmatch_t pmatch[], int eflags )
{
   int retval;

#if NEED_REGEXEC_MUTEX
   ufdbGetMallocMutex( "UFDBregexec" );
   pthread_mutex_lock( &ufdb_regexec_mutex );
#endif

   retval = regexec( preg, string, nmatch, pmatch, eflags );

#if NEED_REGEXEC_MUTEX
   pthread_mutex_unlock( &ufdb_regexec_mutex );
   ufdbReleaseMallocMutex( "UFDBregexec" );
#endif

   return retval;
}


/*
 *  initialise a DB (open and/or create a .ufdb file) 
 */
int UFDBloadDatabase( 
   struct UFDBmemTable * mtable, 
   char *                file    )
{
   int                   n;
   int                   in;
   struct stat           fileStat;
   char                  f[1024];

   if (file == NULL)
   {
      return UFDB_API_ERR_NULL;
   }

#if 0
   fprintf( stderr, "   UFDBloadDatabase( %s )\n", file );
#endif

   strcpy( f, file );
   in = open( f, O_RDONLY );
   if (in < 0)
   {
      strcat( f, UFDBfileSuffix );
      in = open( f, O_RDONLY );
   }

   if (in < 0)
   {
      return UFDB_API_ERR_NOFILE;
   }

   if (fstat(in,&fileStat) < 0)
   {
      close( in );
      return UFDB_API_ERR_NOFILE;
   }

   mtable->table.tag = (unsigned char *) "UNIVERSE";
   mtable->table.nNextLevels = 0;
   mtable->table.nextLevel = NULL;
   mtable->mem = ufdbMalloc( fileStat.st_size + 1 );
   mtable->tableSize = fileStat.st_size - sizeof(struct UFDBfileHeader);
   n = read( in, mtable->mem, fileStat.st_size );
   close( in );
   if (n != fileStat.st_size)
   {
      return UFDB_API_ERR_READ;
   }

   UFDBparseTableHeader( mtable );
   UFDBparseTable( mtable );

   return UFDB_API_OK;
}



struct ufdbRegExp * ufdbNewPatternBuffer( 
  char * pattern, 
  int    flags )
{
  struct ufdbRegExp * re;

  re = (struct ufdbRegExp *) ufdbMalloc( sizeof(struct ufdbRegExp) );

  re->pattern = (char *) ufdbMalloc( strlen(pattern) + 1 );
  strcpy( re->pattern, pattern );
  re->substitute = NULL;
  re->compiled = (regex_t *) ufdbCalloc( 1, sizeof(regex_t) );
  re->error = UFDBregcomp( re->compiled, pattern, flags );
  re->flags = flags;
  re->global = 0;
  re->httpcode = NULL;
  re->next = NULL;

  return re;
}


/*
 * initialize an expression list (read them from a file and do the regexp compilation)
 */
int UFDBloadExpressions( 
   struct ufdbRegExp ** exprList,
   char *               file  )
{
   FILE *               fin;
   char *               eoln;
   struct ufdbRegExp *  re;
   struct ufdbRegExp *  last;
   int                  retCode;
   char                 line[1024];

   if (exprList == NULL)
      return UFDB_API_ERR_NULL;
   *exprList = NULL;

   if (file == NULL)
      return UFDB_API_ERR_NULL;

   fin = fopen( file, "r" );
   if (fin == NULL)
      return UFDB_API_ERR_NOFILE;

   retCode = UFDB_API_OK;
   last = NULL;
   re = NULL;

   while (fgets( line, sizeof(line), fin ) != NULL)
   {
      if (line[0] == '#')         /* skip comments */
         continue;

      eoln = strchr( line, '\n' );
      if (eoln == NULL  ||  eoln == &line[0])
         continue;	/* skip empty lines and lines without a newline */
      else
      {
         if (*(eoln-1) == '\r')
	    eoln--;
      }
      *eoln = '\0';	/* get rid of the newline */

      re = ufdbNewPatternBuffer( line, REG_EXTENDED | REG_NOSUB | REG_ICASE );
      if (re->error)
      {
#if 0
	 printf( "error compiling %s: \"%s\"\n", file, line );
#endif
         retCode = UFDB_API_ERR_EXPR;
      }

      re->next = last;
      last = re;
   }

   *exprList = re;
   fclose( fin );

   return retCode;
}


/*
 * match a URL with a compiled RE.
 * return 0 if no match, 1 if there is a match.
 */
int ufdbRegExpMatch(   
   struct ufdbRegExp * regexp, 
   char *              str )
{
   struct ufdbRegExp * rp;
   int                 error;

   for (rp = regexp; rp != NULL; rp = rp->next)
   {
      if (rp->error)
         continue;
      error = UFDBregexec( rp->compiled, str, 0, 0, 0 );
#if 0
      fprintf( stderr, "ufdbRegExpMatch %s %s  %smatch\n", str, rp->pattern, error==0?"":"NO " );
#endif
      if (error == 0) 	/* match */
         return 1;
      if (error != REG_NOMATCH) 
         return UFDB_API_ERR_EXPR;
   }

   return 0;
}


void ufdbFreeRegExprList( struct ufdbRegExp * re )
{
   if (re == NULL)
      return;

   ufdbFreeRegExprList( re->next );

   ufdbFree( re->pattern );
   UFDBregfree( re->compiled );
   ufdbFree( re->compiled );
   ufdbFree( re->substitute );
   ufdbFree( re );
}


void ufdbResetCPUs( void )
{
   cpu_mask = 0UL;
}


/*
 * Linux support binding threads to a set of CPUs which prevents cache contention.
 * Freebsd has no support, but the 5.x series is recommended for best multithreaded performance.
 * On Solaris it is recommended to start ufdbguardd in a processor set.
 */
int ufdbSetThreadCPUaffinity( int thread_num )
{
#if defined(__linux__) && defined(SYS_sched_setaffinity)
   int retval;

   if (cpu_mask != 0UL)
   {
      retval = syscall( SYS_sched_setaffinity, 0, 4, &cpu_mask );
      if (retval < 0)
         return UFDB_API_ERR_ERRNO;
   }
#endif

   return UFDB_API_OK;
}


/*
 * Bind my processes and threads to a set of CPUs.
 * This increases the cache efficiency for all programs.
 */
int ufdbSetCPU( 
   char * CPUspec )		/* comma-separated CPU numbers (starting with CPU 0) */
{
   int    cpu;

#if defined(_SC_NPROCESSORS_ONLN)
   num_cpus = sysconf( _SC_NPROCESSORS_ONLN );
   /* printf( "this system has %ld CPUs\n", num_cpus ); */

#elif defined(__linux__) && defined(SYS_sched_getaffinity)
   /* sched_getaffinity() is buggy on linux 2.4.x so we use syscall() instead */
   cpu = syscall( SYS_sched_getaffinity, getpid(), 4, &cpu_mask );
   /* printf( "sched_getaffinity returned %d %08lx\n", cpu, cpu_mask ); */
   if (cpu >= 0)
   {
      num_cpus = 0;
      for (cpu = 0; cpu < UFDB_MAX_CPUS; cpu++)
	 if (cpu_mask & (1 << cpu))
	    num_cpus++;
      /* printf( "   found %d CPUs in the cpu mask\n", num_cpus ); */
   }
   else
#else
      num_cpus = UFDB_MAX_CPUS;
#endif

   cpu_mask = 0;
   while (*CPUspec)
   {
      if (sscanf(CPUspec,"%d",&cpu) == 1)
      {
	 if (cpu >= 0 && cpu < num_cpus)
	 {
	    cpu_mask |= (1 << cpu);
	 }
	 else
	    return UFDB_API_ERR_RANGE;
      }
      else
         return UFDB_API_ERR_RANGE;

      /* find the next CPU number */
      while (isdigit( (int) *CPUspec))
         CPUspec++;
      while (*CPUspec == ' '  ||  *CPUspec == ',')
         CPUspec++;
   }

   return UFDB_API_OK;
}


/*
 * A bitmap with IP addresses of clients is used to count the number of clients.
 * to keep the bitmap small (2 MB) the first octet of the IP address is ignored.
 *
 * This code assumes that there are 8 bits per byte.
 */

#define BITMAPLENGTH (256U * 256U * 256U / (sizeof(unsigned int) * 8U))
static unsigned int IPbitmap[BITMAPLENGTH];

/* we can receive from Squid an IP address (most common) or a FQDN.
 * In case that we receive a FQDN, we calculate a hash and use this 
 * as a psuedo IP address.
 */
void UFDBregisterCountedIP( const char * address )
{
   unsigned char * a;
   unsigned int    i, o1, o2, o3;
   unsigned int    nshift;

   a = (unsigned char *) address;

   /* extremely simple way of looking if the parameter is an IP address... */
   if (*a >= '0' && *a <= '9')
   {
      /* first skip the first octect */
      while (*a != '.'  && *a != '\0')
	 a++;
      if (*a == '.') a++;

      o1 = 0;
      while (*a >= '0' && *a <= '9' && *a != '\0')
      {
	 o1 = o1 * 10U + (*a - '0');
	 a++;
      }
      if (*a == '.') a++;

      o2 = 0;
      while (*a >= '0' && *a <= '9' && *a != '\0')
      {
	 o2 = o2 * 10U + (*a - '0');
	 a++;
      }
      if (*a == '.') a++;

      o3 = 0;
      while (*a >= '0' && *a <= '9' && *a != '\0')
      {
	 o3 = o3 * 10U + (*a - '0');
	 a++;
      }
      o1 = (o1 << 16) + (o2 << 8) + o3;
   }
   else 	/* no IP a, calculate a hash */
   {
      o1 = 104729U;
      while (*a != '\0')
      {
         o1 = o1 * 17U + (*a - ' ');
	 a++;
      }
      o1 = o1 % (256U * 256U * 256U);
   }

   i = o1 / (sizeof(unsigned int) * 8U);
   /* if we got a non-existent IP address, we might go out of bounds... */
   if (i >= BITMAPLENGTH)
      i = BITMAPLENGTH - 1;

   nshift = o1 - i * (sizeof(unsigned int) * 8U);
   /* 
    * To be thread-safe we should use a semaphore here.
    * But since this counter is not critical and a lost bit-set 
    * will probably be covered by another call to this function,
    * we choose for superfast code and skip the use of a semaphore.
    */
   IPbitmap[i] |= (1 << nshift);
}


void UFDBinitializeIPcounters( void )
{
   int i;

   for (i = 0; i < BITMAPLENGTH; i++)
      IPbitmap[i] = 0;
}


unsigned long UFDBgetNumberOfRegisteredIPs( void )
{
   unsigned long n;
   unsigned int  v;
   int           i;

   n = 0;
   for (i = 0; i < BITMAPLENGTH; i++)
   {
      v = IPbitmap[i];
      while (v != 0)
      {
         if (v & 1)
	    n++;
	 v >>= 1;
      }
   }

   return n;
}


void ufdbSetSignalHandler( int signal, void (*handler)(int) )
{
#if HAVE_SIGACTION
   struct sigaction act;

#ifndef SA_NODEFER
#define SA_NODEFER 0
#endif

#ifndef SA_NOCLDSTOP
#define SA_NOCLDSTOP 0
#endif

   act.sa_handler = handler;
   act.sa_flags = SA_RESTART;
   if (signal == SIGCHLD)
      act.sa_flags |= SA_NOCLDSTOP;
   sigemptyset( &act.sa_mask );
   sigaction( signal, &act, NULL );

#else

#if HAVE_SIGNAL
   signal( signal, handler );
#else
   ufdbLogError( "ufdbSetSignalHandler: cannot set handler for signal %d", signal );
#endif

#endif
}


void UFDBdropPrivileges( const char * username )
{
   struct passwd   pws;
   struct passwd * fieldptrs;
   char            pws_fields[1024];

   if (username == NULL  ||  username[0] == '\0')
      return;

   if (geteuid() != 0)
   {
      ufdbLogError( "UFDBdropPrivileges: i am not root so cannot run as user %s", username );
      return;
   }

   if (getpwnam_r( username, &pws, pws_fields, sizeof(pws_fields)-1, &fieldptrs ) != 0)
   {
      ufdbLogError( "Cannot get info on user `%s' so cannot run as this user.", username );
      ufdbLogMessage( "Still running as root.  user %s probably does not exist.", username );
      return;
   }
   if (seteuid( pws.pw_uid ) != 0)
   {
      ufdbLogError( "Cannot drop privileges and become user %s: %s", username, strerror(errno) );
      return;
   }

   (void) setgid( pws.pw_gid );
   if (UFDBglobalDebug)
      ufdbLogMessage( "Privileges are dropped: now running as user `%s'", username );
}

