/*
  By accepting this notice, you agree to be bound by the following
  agreements:
  
  ufdbGuard is copyrighted (C) 2005,2006,2007 by URLfilterDB with all rights reserved.
  ufdbGuard is based on squidGuard.

  squidGuard is copyrighted (C) 1998 by
  ElTele st AS, Oslo, Norway, with all rights reserved.
  
  This program is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License (version 2) as
  published by the Free Software Foundation.  It is distributed in the
  hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  PURPOSE.  See the GNU General Public License (GPL) for more details.
  
  You should have received a copy of the GNU General Public License
  (GPL) along with this program.
*/


#include "ufdb.h"
#include "sg.h"

#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <signal.h>
#include <pthread.h>

#include "ufdblib.h"

extern int sig_hup;           
extern int sig_other;           
extern int sig_alrm;           
extern char ** globalArgv;           
extern char ** globalEnvp;           
extern struct Acl * defaultAcl;

extern struct Source * Source;
extern struct Destination * Dest;


void sgHandlerSigHUP( int s )
{
  if (sig_hup == s)
     abort();
  sig_hup = s;
}


void sgSignalHandler( int s )
{
  if (sig_other == s)
     abort();
  sig_other = s;
}


void sgAlarm( int signal )
{
  sig_alrm = 1;
  sgTimeNextEvent();
}


#if 0	
static void printSquidInfo( struct SquidInfo * s )
{
  fprintf( stderr, "   protocol = %s\n", s->protocol );
  fprintf( stderr, "   port     = %d\n", s->port     );
  fprintf( stderr, "   dot      = %d\n", s->dot      );
  UFDBprintRevURL( s->revUrl );
  fprintf( stderr, "   domain   = %s\n", s->domain       );
  fprintf( stderr, "   orgdomain= %s\n", s->orig_domain  );
  fprintf( stderr, "   url      = %s\n", s->url          );
  fprintf( stderr, "   orig     = %s\n", s->orig         );
  fprintf( stderr, "   surl     = %s\n", s->surl         );
  fprintf( stderr, "   src      = %s\n", s->src          );
  fprintf( stderr, "   srcDom   = %s\n", s->srcDomain    );
  fprintf( stderr, "   ident    = %s\n", s->ident        );
  fprintf( stderr, "   method   = %s\n", s->method       );
  fprintf( stderr, "   urlgroup = %s\n", s->urlgroup     );
}
#endif


/*
 * parsers the squidline:
 * URL ip-address/fqdn ident method
 * Squid pre2.6: http://www.sex.com 10.1.1.44/- - GET
 * Squid 2.6.x:  http://www.sex.com 10.1.1.44/- - GET -    (urlgroup added)
 */
int parseLine( 
  UFDBthreadAdmin *  admin, 
  char *             line, 
  struct SquidInfo * s )
{
  char * p, *d, *a, *e, *o, *field;
  char * up, *upp;
  int    i; 
  char * lineptr;

  s->revUrl = NULL;
  s->quota = NULL;
  s->dot = 0;
  s->port = 80;
  s->orig[0] = 
    s->url[0] = s->domain[0] = s->orig_domain[0] = s->src[0] = s->ident[0] = s->ident[1] =
    s->protocol[0] = s->method[0] = s->urlgroup[0] = s->srcDomain[0] = s->surl[0] =  '\0';


#if 0
  fprintf( stderr, "   line: %s\n", line );
#endif

  field = strtok_r( line, "\t ", &lineptr );
  if (field == NULL)
    return 0;

  /* prevent loops with regular expression matching: */
  p = strstr( field, "/URLblocked.cgi" );
  if (p != NULL)
     *(p+15) = '\0';

#if 1
  strcpy( s->orig, field );
#else
  for (d = s->orig, p = field; *p != '\0'; p++)
  {
     /* The original URL is used to display the "no access" error and
      * for reasons of privacy we remove everything after a ? or &.
      * This also prevents loops in the display of an error message
      * where a regular expression matches a part of a URL that is
      * in the error message (i.e. the error message gets blocked
      * and a new error message is generated).
      */
     if (*p == '?' || *p == '&')	/* strip '?...' and '&...' */
     {
	*d++ = '.';
	*d++ = '.';
	*d++ = '.';
	break;
     }
     *d++ = *p;
  }
  *d = '\0';
#endif

  for (p = field; *p != '\0'; p++) 	/* convert URL to lowercase chars */
  {
#if 0
    /* This code is causing problems since a regular expression match for 
     * Google's cache (http://64.233.169.104/search?q=cache:OK6uOgz2pp0J:www.xs4all.nl/)
     * fails ! */
    if (*p == '?' || *p == '&')		/* strip '?...' and '&...' */
    {
       *p = '\0';
       break;
    }
#endif
    if (*p >= 'A'  &&  *p <= 'Z')
      *p += 'a' - 'A';
  }

  p = strstr( field, "://" );
  if (p == NULL)			/* no protocol, defaults to http */
  {
    strcpy( s->protocol, "http" );
    p = field;
  }
  else
  {
    i = p - field;
    if (i > 15)
       i = 15;				/* WHOEHA a large protocol name. truncate it */
    strncpy( s->protocol, field, i );
    s->protocol[i] = '\0';
    if (strcmp( s->protocol, "https" ) == 0)
       s->port = 443;
    p += 3;
    field = p;
  }

  /* do some url decoding */

  up = field;
  upp = s->url;
  i = 0;
  while(up[i] != '\0') 
  {
#if 0
    if (up[i] == '?' || up[i] == '&')
    {
       up[i] = '\0';
       break;
    }
#endif
    if (up[i] == '%')
    {
      if (isxdigit( (int) up[i+1]) && isxdigit( (int) up[i+2]))
      {
	int    hex;
	char   h1, h2;
	/* note that the URL is already converted to lowercase */
#if 0
        h1 = up[i+1] >= 'a' ? up[i+1] - ('a' - 'A') : up[i+1];
        h2 = up[i+2] >= 'a' ? up[i+2] - ('a' - 'A') : up[i+2];
        hex = h1 >= 'A' ? h1 - 'A' - 10 : h1 - '0';
        hex *= 16;
        hex += h2 >= 'A' ? h2 - 'A' - 10 : h2 - '0';
#else
	h1 = up[i+1];
	h2 = up[i+2];
	hex  = (h1 <= '9') ? h1 - '0' : h1 - 'a' + 10;
        hex *= 16;
	hex += (h2 <= '9') ? h2 - '0' : h2 - 'a' + 10;
#endif
	/* don't convert control characters */
	if (hex <= 0x20)
	{
	   *upp++ = up[i++];
	   *upp++ = up[i++];
	   *upp++ = up[i];
	}
	else
	{ 
#if 0
	   if (hex == '?' || hex == '&')
	   {
	      up[i] = '\0';
	      break;
	   }
#endif
	   if (hex >= 'A'  &&  hex <= 'Z')
	      hex += 'a' - 'A';
	   *upp++ = hex;
	   i += 2;
	}
      }
      else
      { 						/* an erroneous hex code, we ignore it */
        *upp++ = up[i++];
        *upp++ = up[i++];
        *upp++ = up[i];
      }
    }
    else
    {
      while (up[i] == '/')
      {
         if (up[i+1] == '/')					/* substitute // by / */
	    i++;
	 else if (up[i+1] == '.'  && up[i+2] == '/')		/* substitute /./ by / */
	    i += 2;
	 else
	    break;
      }
      *upp++ = up[i];
    }
    i++;
  }
  *upp++ = up[i];
  *upp = '\0';

#if 0
  fprintf( stderr, ">>>> s->url = %s\n", s->url );
#endif

  d = strchr(p,'/'); 		/* find domain end */
  e = d;
  a = strchr(p,'@'); 		/* find auth  */
  if (a != NULL  &&  (a < d || d == NULL)) 
    p = a + 1;

  a = strchr(p,':'); 		/* find port */
  if (a != NULL  &&  (a < d || d == NULL))
  {
    s->port = 0;
    o = a + 1;
    while (isdigit( (int) *o ))
    {
       s->port = s->port * 10 + (*o - '0');
       o++;
    }
    memmove( a, o, strlen(o)+1 );	/* remove the port number from the URL */
    e = a;
  }

  /* how ugly, Squid does not send the https protocol string */
  if (s->port == 443)
     strcpy( s->protocol, "https" );

  if (e == NULL)
     strcpy( s->orig_domain, p );
  else
  {
     i = e - p;
     strncpy( s->orig_domain, p, i );
     s->orig_domain[i] = '\0';
  }
  
  /* strip prefix www. www8. ftp. ftp5. ... */
  o = p;

  if ((p[0] == 'w' && p[1] == 'w' && p[2] == 'w') ||		/* match www. */
      (p[0] == 'f' && p[1] == 't' && p[2] == 'p'))  		/* match ftp. */
  {
    p += 3;
    while (p[0] <= '9' && p[0] >= '0')              		/* match www99. */
      p++;
    if (p[0] != '.'  ||  strchr( p+1, '.' ) == NULL)		/* skip www_xxx.com and www99.com */
      p = o;  /* not a hostname */
    else
      p++;
  }

  if (e == NULL)
  {
     strcpy( s->domain, p );	

     /* The original URL will have &blah appended when safe-search is ON
      * and therefore we must make sure that URLs that only have the domain name,
      * have a / before the &blah, i.e the result becomes hotbot.com/&adf=on 
      */
     strcat( s->orig, "/" );
  }
  else 
  {
    memcpy( s->domain, p, e - p );	
    *(s->domain + (e - p)) = '\0';
  }
  strcpy( s->surl, p );

#if 0
  fprintf( stderr, ">>>>> s->surl = %s\n", s->surl );
#endif

  for (o = s->domain; isdigit( (int) *o) || *o == '.'; o++)
     ;
  if (*o == '\0')
    s->dot = 1;

  if ((p = strtok_r(NULL," \t\n",&lineptr)) != NULL)
  {
    o = strchr(p,'/');
    if (o != NULL) 
    {
      memcpy( s->src, p, o-p );
      s->src[o-p] = '\0';
      strcpy( s->srcDomain, o+1 );
      if (*s->srcDomain == '-')
	s->srcDomain[0] = '\0';
    }
    else
    {
      strcpy( s->src, p );
      s->srcDomain[0] = '\0';	/* TODO: check */
    }

    if ((p = strtok_r(NULL," \t\n",&lineptr)) != NULL)		/* parse ident */
    {
      if (p[0] != '-'  ||  p[1] != '\0')
      {
	strcpy( s->ident, p );
	for (p = s->ident; *p != '\0'; p++) 	/* convert ident to lowercase chars */
	{
	  if (*p >= 'A'  &&  *p <= 'Z')
	    *p += 'a' - 'A';
        }
      }

      if ((p = strtok_r(NULL," \t\n",&lineptr)) != NULL)	/* parse method, e.g. GET/CONNECT */
         strcpy( s->method, p );

      if ((p = strtok_r(NULL," \t\n",&lineptr)) != NULL)	/* parse optional urlgroup (new in squid 2.6) */
	 strcpy( s->urlgroup, p );
      else
         strcpy( s->urlgroup, "#" );
    }
  }

  if (s->domain[0] == '\0')
    return 0;
  if (s->method[0] == '\0')
    return 0;

#if 0
  fprintf( stderr, ">>>>>> s->surl = %s\n", s->surl );
#endif

  /* s->revUrl is pointing to malloced memory and must be freed at the end of the main loop ! */
  s->revUrl = UFDBgenRevURL( admin, (unsigned char *) s->surl );

#if 0
  printSquidInfo( s );
#endif

  return 1;
}


/* 
 *  returns a pointer to the domain part of a fully-qualified  hostname
 *  so www.abc.xyz.dom/index.html -> xyz.dom/index.html
 */
char * sgSkipHostPart( char * domain )
{
  char *p = domain , *d1 = NULL, *d2 = NULL, *path = NULL;

  if ((path = (char *) strchr(p,'/')) == NULL)
    path = domain; 

  while((p = (char *) strchr(p,'.')) != NULL )
  {
    if (p > path && path != domain)
      break;
    d2 = d1;
    d1 = p;
    p++;
  }
  if (d2 != NULL)
    return d2+1;

  return domain;
}

/*
 * checks the vality of a dotted address. 
 */
ulong * sgConvDot( char * dot, ulong * ipaddr )
{
   char * t;
   int    octet;
   int    shift;

   *ipaddr = 0;
   shift = 24;
   t = dot;
   while (*t != '\0')
   {
      if (*t > '9'  ||  *t < '0')
         return NULL;

      octet = 0;
      while (*t >= '0'  &&  *t <= '9') 
      {
	 octet = octet*10 + (*t - '0');
         t++;
      }

      if (*t == '.') 
         t++;
      else if (*t != '\0') 
         return NULL;

      if (shift < 0)
         return NULL;

      if (octet > 255)
         return NULL;

      *ipaddr |= octet << shift;
      shift -= 8;
   }

   return ipaddr;
}


/*
 * Reverses cmp of strings
 */
int sgStrRcmp( char * a, char * b )
{
  char *a1 = (char *) strchr(a,'\0');
  char *b1 = (char *) strchr(b,'\0');

  while(*a1 == *b1)
  {
    if (b1 == b || a1 == a)
      break;
    a1--; b1--;
  }
  if (a1 == a && b1 == b)
    return *a1 - *b1; 
  if (a1 == a)
    return -1;
  if (b1 == b)
    return 1;

  return *a1 - *b1;
}


int sgStrRncmp( char * a, char * b, int blen )
{
  char *a1 = (char *) strchr(a,'\0');
  char *b1 = (char *) strchr(b,'\0');

  while(*a1 == *b1 && blen > 0 )
  {
    if (b1 == b || a1 == a)
      break;
    a1--; b1--; blen--;
  }

  if (a1 == a && b1 == b)
    return *a1 - *b1; 

  if (blen == 0)
    return *a1 - *b1;

  if (a1 == a)
    return -1;

  if (b1 == b)
    return 1;

  return *a1 - *b1;
}


/*
 *  sgDomStrRncmp checks if p2 is equal to or a subdomain of p1
 */
int sgDomStrRcmp( char * p1, char * p2 )
{
  char *p1e = (char *) strchr(p1,'\0');
  char *p2e = (char *) strchr(p2,'\0');

  for(;p1e >= p1 && p2e >= p2 && *p1e == *p2e; p1e--, p2e--)
     ;

  if (p1e < p1 && p2e < p2)
    return 0;
  if (p2e < p2)
    return -*p1e;
  if (p1e < p1 && *p2e == '.')	/* found a subdomain */
    return 0;
  return *p1e - *p2e;
}


/*
 *  Regexp functions
 */


char * ufdbRegExpSubst( struct ufdbRegExp * regexp, char * pattern, char * newstring )
{
  struct ufdbRegExp * re;
  regmatch_t        pm;
  char *            result = NULL;
  char *            p;
  int               substlen;

  newstring[0] = '\0';
  for (re = regexp; re != NULL; re = re->next)
  {
    if (UFDBregexec(re->compiled, pattern, 1, &pm, 0) != 0) 
    {
      result = NULL;
    }
    else
    {
      substlen = strlen( re->substitute );
      if (re->httpcode != NULL)
	strcpy( newstring, re->httpcode );
      else
	*newstring = '\0';
      p = newstring;
      do {
	if ((p - newstring) + pm.rm_so >= MAX_BUF)
	  break;
	p = strncat( newstring, pattern, pm.rm_so );
	if ((p - newstring) + substlen >= MAX_BUF)
	  break;
	p = strcat( newstring, re->substitute );	
	pattern = pattern + pm.rm_eo;
      } while (UFDBregexec(re->compiled, pattern, 1, &pm, REG_NOTBOL) == 0 && re->global);
      if ((p - newstring) + strlen(pattern) <= MAX_BUF)
	p = strcat( newstring, pattern );
      result = newstring;
      break;
    }
  }

  return result;
}


char * sgRewriteExpression( struct sgRewrite * rewrite, char * subst, char * newstring )
{
  char * result;

  result = ufdbRegExpSubst( rewrite->rewrite, subst, newstring );
  return result;
}


/*
 *  Expand a redirection URL string: fill in the %i, %n etc.
 */
char * sgParseRedirect( 
  char *             redirect,
  struct SquidInfo * req,
  struct Acl *       acl, 
  struct AclDest *   aclpass,
  char *             buf,
  int                force_302 )
{
  char * p;
  char * q;
  char * t;
  char * d;

#ifdef UFDB_USERQUOTA_SUPPORT
  struct Source * s = req->lastActiveSource;
#endif

  if (aclpass == NULL)
    aclpass = defaultAcl->pass;

  buf[0] = '\0';
  q = NULL;
  p = redirect;

  if (force_302  &&  strcmp(redirect,"302:") != 0)
  {
     strcpy( buf, "302:" );
  }

  while((p = strchr(p,'%')) != NULL)
  {
    if (q == NULL) 
    {
      memcpy( buf, redirect, p - redirect );
      buf[p - redirect] = '\0';
    }
    else 
    {
      strncat( buf, q,  p - q );
    }

    p++;
    switch (*p)
    {
    case 'a': 			/* Source Address */
      strcat( buf, req->src );
      break;

    case 'A':
       t = ufdbSettingGetValue( "administrator" );
       if (t == NULL)
          t = "your system administrator or helpdesk";
       d = buf + strlen( buf );
       while (*t != '\0')
       {
          if (*t == ' ' || *t == '=' || *t == '&' || *t == '\\' || *t == '"')
	  {
	     char h;

	     *d++ = '%';
	     h = *t / 16;
	     *d++ = (h >= 10) ? ('A' - 10 + h) : ('0' + h);
	     h = *t % 16;
	     *d++ = (h >= 10) ? ('A' - 10 + h) : ('0' + h);
	  }
	  else
	     *d++ = *t;
	  t++;
       }
       *d = '\0';
       break;

    case 'i': 			/* Source User Ident */
      if ((req->ident[0] == '\0') || (req->ident[0] == '-'  &&  req->ident[1] == '\0'))
      {
	strcat( buf, "unknown" );
      }
      else
      {
	strcat( buf, req->ident );
      }
      break;

    case 'q': 				/* userquota info */
#ifdef UFDB_USERQUOTA_SUPPORT
      if (s != NULL  && 
          s->userquota.seconds != 0  &&  
	  strcmp(req->ident, "-") != 0) 
      {
	struct UserQuotaInfo * userquota;
	if (sgDbLookup( s->userDb, req->ident, (char **) (char *) &userquota ) == 1) 
	{
	  char qbuf[150];
	  sprintf( qbuf, "%d-%d-%d-%d-%d-%d", 
		   s->userquota.renew, (int) s->userquota.seconds, userquota->status,
		   (int) userquota->time, (int) userquota->last, userquota->consumed );
	  strcat(buf, qbuf);
	}
	else
	{
	  strcat( buf, "noquota" );
	}
      }
      else
      {
	strcat( buf, "noquota" );
      }
#else
      strcat( buf, "noquota" );
#endif  /* UFDB_USERQUOTA_SUPPORT */
      break;

    case 'n': 		/* Source Domain Name */
      if ((req->srcDomain[0] == '\0') || (req->srcDomain[0] == '-'  &&  req->srcDomain[1] == '\0'))
      {
	strcat(buf, "unknown" );
      }
      else
      {
	strcat( buf, req->srcDomain );
      }
      break;

    case 'p': 		/* The url path */
      if ((t = strstr(req->orig,"//")) != NULL)
      {
        t += 2;
        if ((t = strchr(t,'/')) != NULL)
	{
	  t++;
	  strcat( buf, t );
        }
      }
      break;

    case 'f': 		/* The url file */
      if ((t = strrchr(req->orig,'/')) != NULL)
      {
        t++;
        strcat( buf, t );
      }
      break;

    case 's': 		/* Source Class Matched */
      if (acl->source == NULL || acl->source->name == NULL)
	strcat( buf, "default" );
      else
	strcat( buf, acl->source->name );
      break;

    case 't': 		/* Target Class Matched */
      if (aclpass == NULL)
	strcat( buf, "unknown" );
      else if (aclpass->name == NULL)
	if (aclpass->type == ACL_TYPE_INADDR)
	  strcat( buf, "in-addr" );
	else if (aclpass->type == ACL_TYPE_TERMINATOR)
	  strcat( buf, "none" );
	else
	  strcat( buf, "unknown" );
      else
	strcat( buf, aclpass->name );
      break;

    case 'u': 		/* Requested URL */
      strcat( buf, req->orig );
      break;

    default:
      strcat( buf, "%" );

    }
    p++;
    q = p;
  }

  if (buf[0] == '\0')
    q = redirect;
  else 
    q = buf;

  return q;
}


void sgEmergency( void )
{
  char buf[MAX_BUF];

  ufdbLogError("going into emergency mode");
  while(fgets(buf, MAX_BUF, stdin) != NULL)
  {
    puts("");
    fflush(stdout);
  }
  ufdbLogError("ending emergency mode, stdin empty");
  exit(1);
}


/*
 *  converts yyyy.mm.ddTHH:MM:SS to seconds since EPOC
 */

time_t iso2sec( char * date )
{
  struct tm *t;
  int y,m,d,H,M,S;

  t = (struct tm *) ufdbCalloc(1,sizeof(struct tm));
  sscanf(date,"%4d%*[.-]%2d%*[.-]%2d%*[T]%2d%*[:-]%2d%*[:-]%2d",
	 &y,&m,&d,&H,&M,&S);
  m--; 
  y = y - 1900;
  if (y < 0 || m < 0 || m > 11 || d < 1 || d > 31 || H < 0 || H > 23 
     || M < 0 || M > 59 || S < 0 || S > 59)
    return (time_t) -1;
  t->tm_year = y;
  t->tm_mon =  m;
  t->tm_mday = d;
  t->tm_hour = H;
  t->tm_min = M;
  t->tm_sec = S;

  return (time_t) mktime(t);
}

/*
 *  converts yyyy.mm.dd to seconds since EPOC
 */
time_t date2sec( char * date )
{
  struct tm *t;
  int y,m,d;

  t = (struct tm *) ufdbCalloc(1,sizeof(struct tm));
  sscanf(date,"%4d%*[.-]%2d%*[.-]%2d",&y,&m,&d);
  m--; 
  y = y - 1900;
  if (y < 0 || m < 0 || m > 11 || d < 1 || d > 31)
    return (time_t) -1;
  t->tm_year = y;
  t->tm_mon =  m;
  t->tm_mday = d;

  return (time_t) mktime(t);
}


struct UserQuotaInfo * setuserquota( struct UserQuotaInfo * uq )
{
  uq->time = 0; 
  uq->last = 0; 
  uq->consumed = 0; 
  uq->status = 0; 

  return uq;
}

