/*****************************************************************************
 * $Id: parser.l,v 1.23 2005/04/25 22:01:06 devel Exp $
 * Author:		Jim Brooks <tools@jimbrooks.org>
 * Start Date:	2003/09/28
 * Description:	C/C++ lint program.
 * License:		GNU GENERAL PUBLIC LICENSE (GPL)
 * Notes:
 *****************************************************************************/

%{

#define YY_NEVER_INTERACTIVE 1
#define YY_NO_UNPUT				/* shutup warning: `yyunput' defined but not used */
#define ECHO					/* don't echo unmatched patterns */

#include "y.tab.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include "parser.h"
#include "lwlint.h"
#include "str_heap.h"

int lineLex	= 1;
int lineLexPre;

extern STR_HEAP strHeap;
extern int eof;

static int
LexToken( int token );
static int
SuppressionComment( void );

%}

/*===========================================================================*/

/*****************************************************************************
 * These regular expressions were written and given to me by Seven of Nine.
 * I stared and told her, "Hey, this isn't your real phone number!".
 * #7 replied, "Moron, it means to match up to one or two lines that contain
 * several groups of C types or whitespace, the last line must contain
 * a left parenthesis, the bounding parentheses can contain a few lines of
 * identifiers and array/pointer/reference operators, but no parentheses
 * nor brackets, the right parenthesis can be followed by whitespace or nothing,
 * and opening/closing braces of the function definition must start on new lines.
 * It must adapt itself to pointer/array/reference syntax (*,[],&).
 * Object-oriented programming is irrelevant (ignore C++ scope operator "::")."
 * #7 added, "And you can forget about being assimilated".
 *****************************************************************************/

/*****************************************************************************
 * Notes:
 * - "." means any char except newline.
 * - AFAIK "^" can only be used once in a pattern or flex will complain vaguely.
 * - flex -i (case-insensitive) is used, so a-zA-Z is unnecessary.
 * - The ".*" at end is to scoop any suppression comments (eg //@lint)
 *   into yytex which is the scanned by strstr() to find a suppression comment.
 * - Example of a dangling "else" (indentation is deceptive):
 *   Correction is to put braces around the second "if" statement.
 *    if ( inputReady )
 *        if ( file ) ProcessInput();
 *    else
 *       fprintf( stderr, "ERROR: input not ready.\n" );
 *****************************************************************************/

STAR				\*
WS					[ \t]*
NL					\n
NAME				[a-z_][a-z_0-9]{0,20}
TYPE				{NAME}
COMMENTC            \/\*.*\*\/
COMMENTCPP          \/\/.*
COM					{COMMENTC}|{COMMENTCPP}
CL					{WS}{COM}{WS}\n
WSCOM				{WS}{COM}?{WS}
TEXT				[^{};]+
STMT				{TEXT};
BLOCK				\{([^{}\n]*\n){1,5}[ \t]+\}
IF					[ \t]+if[ ]?\(.*\)
ELSE_IF				[ \t]+else[ ]if[ ]?\(.*\)
IF_STMT				{IF}{WSCOM}{NL}{WSCOM}{STMT}
IF_BLOCK			{IF}{WSCOM}{NL}?{WSCOM}{BLOCK}
ELSE_IF_STMT		{ELSE_IF}{WSCOM}{NL}{WSCOM}{STMT}
ELSE_IF_BLOCK		{ELSE_IF}{WSCOM}{NL}?{WSCOM}{BLOCK}
OCTAL				0[1-9]+

%%

	/* Ignore preprocessor statements, esp. #if/#else */
^{WS}\#.*			{ ; }

	/* Check for assignment within "if": if ( x = 1 ) */
	/* Not to be confused by NE/GE/LE: if ( x != 1 ) nor if ( x >= y ) */
	/* But a parenthesized assignment is OK by convention: "if ( (x=1) )" */
^{WS}if{WS}\([^(){};]*[^!<>=(){};]=[^=(){};]+\).* {
						 if (!SuppressionComment()) return LexToken( TOKEN_IF_ASSIGN ); }

	/* Bad bit test: "if ( reg & bit == mask ) */
	/* Should be:    "if ( (reg & bit) == mask ) */
^{WS}if{WS}\({WS}{NAME}{WS}&{WS}{NAME}{WS}=={WS}({NAME}|[0-9]).* {
						 if (!SuppressionComment()) return LexToken( TOKEN_BAD_BIT_TEST ); }

	/* Identify dangling "else". */
	/* Don't match semicolon nor braces between two "if"s. */
	/*[^#]if[ ]?\([^;{}\n]*\n?[^;{}\n]*if.*\(	{ return LexToken( TOKEN_IF_IF ); }*/
^{WS}{IF}{WS}{NL}?{WS}{IF}{TEXT};{WS}{NL}{WS}else.* {
	if (!SuppressionComment()) return LexToken( TOKEN_DANGLING_ELSE ); }

	/* Catches omission of "else if" in a series:	*/
	/*												*/
	/*	if ( strstr( argv[i], "-W1" ) {				*/
	/*	}											*/
	/* 	if ( strstr( argv[i], "-W2" ) {		// oops	*/
	/*	}											*/
	/*	else if ( strstr( argv[i], "-W3" ) {		*/
	/*	}											*/
	/*	else if ( strstr( argv[i], "-W4" ) {		*/
	/*	}											*/
^{IF_BLOCK}{WSCOM}{NL}{CL}?{WS}{IF_BLOCK}{WSCOM}{NL}{CL}?{WS}{ELSE_IF} {
	if (!SuppressionComment()) return LexToken( TOKEN_BAD_ELSE_IF ); }
^{IF_BLOCK}{WSCOM}{NL}{CL}?{WS}{ELSE_IF_BLOCK}{WSCOM}{NL}{CL}?{WS}{IF} {
	if (!SuppressionComment()) return LexToken( TOKEN_BAD_ELSE_IF ); }
^{ELSE_IF_BLOCK}{WSCOM}{NL}{CL}?{WS}{ELSE_IF_BLOCK}{WSCOM}{NL}{CL}?{WS}{IF} {
	if (!SuppressionComment()) return LexToken( TOKEN_BAD_ELSE_IF ); }
	/* Below is similar to the above but is for non-braces. */
	/* Usually a series is written with all-braces or no-braces. */
^{IF_STMT}{WSCOM}{NL}{CL}?{WS}{IF_STMT}{WSCOM}{NL}{CL}?{WS}{ELSE_IF} {
	if (!SuppressionComment()) return LexToken( TOKEN_BAD_ELSE_IF ); }
^{IF_STMT}{WSCOM}{NL}{CL}?{WS}{ELSE_IF_STMT}{WSCOM}{NL}{CL}?{WS}{IF} {
	if (!SuppressionComment()) return LexToken( TOKEN_BAD_ELSE_IF ); }
^{ELSE_IF_STMT}{WSCOM}{NL}{CL}?{WS}{ELSE_IF_STMT}{WSCOM}{NL}{CL}?{WS}{IF} {
	if (!SuppressionComment()) return LexToken( TOKEN_BAD_ELSE_IF ); }

	/* Sometimes a decimal constant is written with a leading zero. */
	/* But that is treated as an octal constant in C. */
={WS}{OCTAL}.*			{ if (!SuppressionComment()) return LexToken( TOKEN_OCTAL_RVALUE ); }
\({WS}{OCTAL}.*\).*		{ if (!SuppressionComment()) return LexToken( TOKEN_OCTAL_ARG ); }
\(.*,{WS}{OCTAL}.*\).*	{ if (!SuppressionComment()) return LexToken( TOKEN_OCTAL_ARG ); }

	/* Catch mixing the declarations of variables of pointers-to-T and T. */
	/* This check is disabled by default because it is a popular programming style */
	/* and the compiler will usually catch it anyway. */
	/* But a rare problem can appear caused by bad casting: */
	/*     char *input, output;   // trouble is coming   */
	/*     char* input, output;   // trouble too         */
    /*     output = FALSE;                               */
    /*     ...                                           */
	/*     input = (char*)output;                        */
^{WS}{TYPE}[ \ta-z_0-9,*]*{STAR}{WS}{NAME},{WS}{NAME}[,;].* {
						 if (!SuppressionComment()) return LexToken( TOKEN_PTR_NONPTR_DECL ); }
	/*     char output, *input;   // trouble too         */
^{WS}{TYPE}[ \t]+{NAME},[ \ta-z_0-9,*]*{STAR}{NAME}[,;].* {
						 if (!SuppressionComment()) return LexToken( TOKEN_PTR_NONPTR_DECL ); }

	/* "if (flag==true)" and its accomplices are no-nos. */
if{WS}\([^)]*[=!]={WS}true{WS}\)	{ if (!SuppressionComment()) return LexToken( TOKEN_TEST_TRUE );  }
if{WS}\([^)]*[=!]={WS}false{WS}\)	{ if (!SuppressionComment()) return LexToken( TOKEN_TEST_FALSE ); }

\{						{ return LexToken( TOKEN_LEFT_BRACE ); }
\}						{ return LexToken( TOKEN_RIGHT_BRACE ); }
\n						{ ++lineLex; }
.						{ ; }

%%

/*===========================================================================*/

/*----------------------------------------------------------------------------
 * public functions
 *---------------------------------------------------------------------------*/

/*****************************************************************************
 * What to do if end of input is reached.
 *****************************************************************************/
int
yywrap()
{
	/*
	 * Stop.
	 */
	eof = TRUE;
	return 1;
}

/*****************************************************************************
 * Tell lex to use a buffer.
 * Returns	: Buffer's handle (really a pointer to YY_BUFFER_STATE).
 *****************************************************************************/
void*
LexBufferUse( char* pBuf )
{
	yyrestart( NULL );
	return (void*) yy_scan_string( pBuf );
}

/*****************************************************************************
 * Discard buffer.
 * Parms	: hnd
 *			  Pass the handle returned by LexInputUse().
 *****************************************************************************/
void
LexBufferDiscard( void* hnd )
{
	yy_delete_buffer( (YY_BUFFER_STATE) hnd );
}

/*----------------------------------------------------------------------------
 * internal functions
 *---------------------------------------------------------------------------*/

/*****************************************************************************
 * Count lines (\n chars).
 *****************************************************************************/
static int
CountLines( const char* pc )
{
	int lines = 0;
if ( !pc ) return 0;
	for ( ; *pc; ++pc ) { lines += (*pc == '\n'); }
	return lines;
}

/*****************************************************************************
 * Process a lexeme/token.
 *****************************************************************************/
static int
LexToken( int token )
{
	lineLexPre = lineLex;

	/*
	 * When a token is returned to yacc, yacc expects yylval to be assigned by lex
	 * (to give a value to $<str>$ for use in yacc code).
	 * Append this lexeme to a heap of strings.
	 */
	yylval.str = StrHeapAppendFast( &strHeap, yytext, strlen(yytext) );

	/*
	 * Count any new-lines that may have been scooped.
	 */
	lineLex += CountLines( yytext );

	return token;
}

/*****************************************************************************
 * Return TRUE if a lint-suppression comment is found.
 *****************************************************************************/
static int
SuppressionComment( void )
{
	if ( strstr( yytext, "@lint" ) != NULL )
	{
		lineLex += CountLines( yytext );
		return TRUE;
	}
	else
	{
		return FALSE;
	}
}
