/*
 * ufdb-api.c - URLfilterDB
 *
 * ufdbGuard API is copyrighted (C) 2005,2006,2007 by URLfilterDB with all rights reserved.
 *
 * ufdbGuard API is used to integrate the functionality of ufdbGuard into
 * programs of 3rd parties.
 *
 * RCS $Id: ufdb-api.c,v 1.16 2007/11/09 21:22:43 root Exp root $
 */

#include "ufdb.h"
#include "ufdb-api.h"

#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>

/*
 *  A 3rd party typically has an administration of used categories, 
 *  its associated file names and a category handle (UFDBcategory).
 *  We assume this global data structure:
 */

/******************************************************
struct usedCategory_s
{
   char * name;
   char * dbfile;
   char * exprfile;
   UFDBcategory handle;
};

static struct usedCategory_s usedCategory[22];
static int numUsedCategories;
static UFDBthreadAdmin * admin;
*******************************************************/

/* 
 *  The UFDB library uses mutexes to protect data structures in
 *  multithreaded applications.
 *  In case that the API is used in a single-threaded application
 *  without threads, the following pthread functions must have stubs:
 *  pthread_mutex_lock
 *  pthread_mutex_unlock
 */

#ifdef UFDB_API_NO_THREADS

int pthread_mutex_lock( pthread_mutex_t * mutex )
{
   return 0;
}

int pthread_mutex_unlock( pthread_mutex_t * mutex )
{
   return 0;
}

#endif


/* 
 * Load a category.
 */
int UFDBloadCategory( 		/* return value: 0 is success */
   char *         dbfile, 	/* file name of .ufdb file */
   char *         exprfile, 	/* file name of expressions file, may be NULL */
   UFDBcategory * category ) 	/* category administration must be allocated by the caller */
{
   int retCode;

   retCode = UFDBloadDatabase( &category->c_table, dbfile );
   if (retCode != UFDB_API_OK)
      return retCode;

   category->c_expressions = NULL;
   if (exprfile != NULL)
   {
      (void) UFDBloadExpressions( &category->c_expressions, exprfile );
   }

   return UFDB_API_OK;
}


/*
 * Unload a category.
 */
void UFDBunloadCategory( 
   UFDBcategory * category )
{
   UFDBfreeTableIndex( &(category->c_table.table) );
   ufdbFree( category->c_table.mem );

   ufdbFreeRegExprList( category->c_expressions );
}


/*
 * strip a URL:
 * remove http:// prefix, 
 * remove www[0-9*]. prefix,
 * remove port number, 
 * remove username and password,
 * convert hex codes (%61 = 'a') to characters,
 * convert characters to lower case.
 * substitute // by / in a URL
 * substitute /./ by / in a URL
 * remove '?...' and '&...' from URL
 */
void UFDBstripURL( 
   char * URL, 			/* input URL string */
   char * strippedUrl,  	/* output char array (must be 1024 bytes) */
   char * domain,       	/* output char array (must be 1024 bytes) */
   char * protocol,		/* output char array (must be 16 bytes) */
   int  * portnumber )		/* output integer */
{
   char * up;
   char * p;
   char * tmp;
   char * domain_start;
   char * domain_end;
   char * optional_token;
   char   buffer[1300];

   *portnumber = 80;

#ifndef UFDB_KEEP_FULL_URL
   /* remove the part after a '?'  We are not interested in it */
   p = URL;
   while (*p != '\0')
   {
      if (*p == '?'  ||  *p == '&')
         *p = '\0';
      else
         p++;
   }
#endif

   /* strip http: and ftp: protocol header */
   p = strstr( URL, "://" );
   if (p != NULL)
   {
      int n;
      n = p - URL;
      if (n > 15)
      {
	 /* WHOEHA a very large protocol name... truncate it! */
         n = 15;
      }
      memcpy( protocol, URL, n );
      protocol[n] = '\0';
      if (n == 5  &&  strcasecmp( protocol, "https" ) == 0)
         *portnumber = 443;
      p += 3;
   }
   else 
   {
      p = URL;
      strcpy( protocol, "http" );
   }

   domain_end = strchr( p, '/' );

   optional_token = strchr( p, '@' );		/* strip user:password@ */
   if (optional_token != NULL)
   {
      if (optional_token < domain_end  ||  domain_end == NULL)
         p = optional_token + 1;
   }

   domain_start = p;

   						/* strip www[0-9]. */
   if ((p[0] == 'w' && p[1] == 'w' && p[2] == 'w') ||            /* match www. */
       (p[0] == 'f' && p[1] == 't' && p[2] == 'p'))              /* match ftp. */
   {
      tmp = p + 3;
      while (*tmp <= '9' && *tmp >= '0')
         tmp++;
      if (*tmp == '.'  &&  strchr( tmp+1, '.' ) != NULL)
         p = tmp + 1;
   }

   optional_token = strchr( p, ':' );		/* strip :portnum */
   if (optional_token != NULL  &&
       (optional_token < domain_end  ||  domain_end == NULL))
   {
      tmp = buffer;				/* copy domain name */
      while (p < optional_token)
         *tmp++ = *p++;

      p++;					/* skip :portnum */
      *portnumber = 0;
      while (*p <= '9' && *p >= '0')
      {
	 *portnumber = *portnumber * 10 + (*p - '0');
         p++;
      }

      strncpy( tmp, p, 1000 );			/* copy rest of the URL */
   }
   else
   {
      strncpy( buffer, p, 1000 );
   }
   buffer[1000] = '\0';				/* maximise the length of the URL */

   if (domain_end == NULL)			/* save the original domainname */
   {
      if (optional_token == NULL)
	 strcpy( domain, domain_start );
      else
      {
         int n;
	 n = optional_token - domain_start;
	 memcpy( domain, domain_start, n );
	 domain[n] = '\0';
      }
   }
   else
   {
      int n;
      if (optional_token != NULL)
         domain_end = optional_token;
      n = domain_end - domain_start;
      memcpy( domain, domain_start, n );
      domain[n] = '\0';
   }

   /*
    * now a temporary URL is in the buffer
    */
   up = buffer;
   while (*up != '\0')				/* convert URL to lower case */
   {
      if (*up >= 'A'  &&  *up <= 'Z')
	 *up += 'a' - 'A';
      up++;
   }
   *up++ = '\0';				/* prevent problems with % at end of URL */
   *up = '\0';
   
   /*
    *  Copy the buffer to strippedUrl, while converting hex codes to characters.
    */
   p = buffer;
   while (*p != '\0')
   {
      if (*p == '%')				/* start of a HEX code */
      {
         if (isxdigit(*(p+1)) && isxdigit(*(p+2)))
	 {
	    char   h;
	    int    hex;

	    h = *(p+1);
	    hex  = (h <= '9') ? h - '0' : h - 'a' + 10;
	    hex *= 16;
	    h = *(p+2);
	    hex += (h <= '9') ? h - '0' : h - 'a' + 10;
	    /* skip control characters */
	    if (hex <= 0x20)
	    {
	       *strippedUrl++ = *p++;
	       *strippedUrl++ = *p++;
	       *strippedUrl++ = *p++;
	    }
	    else
	    {
	       if (hex >= 'A'  &&  hex <= 'Z')
		  *strippedUrl = hex + 'a' - 'A';
	       else
		  *strippedUrl = hex;
	       strippedUrl++;
	       p += 3;
	    }
	 }
	 else 					/* erroneous code */
	 {
	    *strippedUrl++ = *p++;
	    *strippedUrl++ = *p++;
	    *strippedUrl++ = *p++;
	 }
      }
      else					/* plain character */
      {
	 while (*p == '/')
	 {
	    if (*(p+1) == '/')					/* substitute // by / */
	       p++;
	    else if (*(p+1) == '.'  && *(p+2) == '/')		/* substitute /./ by / */
	       p += 2;
	    else
	       break;
	 }
         *strippedUrl++ = *p++;
      }
   }
   *strippedUrl = '\0';
}


/* 
 * perform a lookup of a URL in a category
 */
int UFDBverifyUrlCategory(		/* return value: 0 means URL is not in the category */
   char *             strippedUrl,	/* stripped URL, e.g. asite.com/cgi-bin/x.pl?a=0 */
   UFDBrevURL *       revUrl,       	/* stripped URL in reverse order */
   UFDBusedCategory * category ) 	/* category handle */
{
   int            retCode;

   retCode = UFDBlookupRevUrl( &category->handle.c_table.table.nextLevel[0], revUrl );
   if (retCode != 0)
      return retCode;

   retCode = ufdbRegExpMatch( category->handle.c_expressions, strippedUrl );
   if (retCode != 0)
      return retCode;

   return UFDB_API_OK;
}


char * ufdbSettingGetValue( char * p )
{ 
   return NULL;
}

