// Copyright (c) 1994 James Clark
// See the file COPYING for copying permission.

#include "Parser.H"
#include "token.H"
#include "MessageArg.H"

Boolean Parser::parseProcessingInstruction()
{
  currentInput()->startToken();
  Location location(currentLocation());
  CString buf;
  for (;;) {
    Token token = getToken(piMode);
    if (token == tokenPic)
      break;
    switch (token) {
    case tokenEe:
      message(Messages::processingInstructionEntityEnd);
      return 0;
    case tokenUnrecognized:
      reportNonSgmlCharacter();
      // fall through
    case tokenChar:
      buf += *currentInput()->currentTokenStart();
      if (buf.length()/2 > syntax().pilen()) {
	message(Messages::processingInstructionLength,
		NumberMessageArg(syntax().pilen()));
	message(Messages::processingInstructionClose);
	return 0;
      }
      break;
    }
  }
  if (buf.length() > syntax().pilen())
    message(Messages::processingInstructionLength,
	    NumberMessageArg(syntax().pilen()));
  noteMarkup();
  eventHandler().pi(new (eventAllocator()) ImmediatePiEvent(buf, location));
  return 1;
}

Boolean Parser::parseLiteral(Mode litMode,
			     Mode liteMode,
			     size_t maxLength,
			     Messages::Type1 tooLongMessage,
			     unsigned flags,
			     Text &text)
{
  unsigned startLevel = inputLevel();
  Mode currentMode = litMode;
  // If the literal gets to be longer than this, then we assume
  // that the closing delimiter has been omitted if we're at the end
  // of a line and at the starting input level.
  size_t reallyMaxLength = (maxLength > size_t(-1)/2
			    ? size_t(-1)
			    : maxLength * 2);
  text.clear();
  for (;;) {
    Token token = getToken(currentMode);
    switch (token) {
    case tokenEe:
      if (inputLevel() == startLevel) {
	message(Messages::literalLevel);
	return 0;
      }
      text.addEntityEnd(currentLocation());
      popInputStack();
      if (inputLevel() == startLevel)
	currentMode = litMode;
      break;
    case tokenUnrecognized:
      if (reportNonSgmlCharacter())
	break;
      message(Messages::literalMinimumData,
	      StringMessageArg(currentToken()));
      break;
    case tokenRs:
      text.ignoreChar(currentChar(), currentLocation());
      break;
    case tokenRe:
      if (text.length() > reallyMaxLength && inputLevel() == startLevel) {
	message(tooLongMessage, NumberMessageArg(maxLength));
	// guess that the closing delimiter has been omitted
	message(Messages::literalClosingDelimiter);
	return 0;
      }
      // fall through
    case tokenSepchar:
      if ((flags & literalSingleSpace)
	  && (text.length() == 0 || text.lastChar() == syntax().space()))
	text.ignoreChar(currentChar(), currentLocation());
      else
	text.addChar(syntax().space(),
		     Location(new ReplacementOrigin(currentLocation(),
						    currentChar()),
			      0));
      break;
    case tokenSpace:
      if ((flags & literalSingleSpace)
	  && (text.length() == 0 || text.lastChar() == syntax().space()))
	text.ignoreChar(currentChar(), currentLocation());
      else
	text.addChar(currentChar(), currentLocation());
      break;
    case tokenCroDigit:
      {
	Char c;
	Location loc;
	if (!parseNumericCharRef(c, loc))
	  return 0;
	if (flags & literalDataTag) {
	  if (!syntax().isSgmlChar(c))
	    message(Messages::dataTagPatternNonSgml);
	  else if (syntax().charSet(Syntax::functionChar)->contains(c))
	    message(Messages::dataTagPatternFunction);
	}
	if ((flags & literalSingleSpace)
	    && c == syntax().space()
	    && (text.length() == 0 || text.lastChar() == syntax().space()))
	  text.ignoreChar(c, loc);
	else
	  text.addChar(c, loc);
      }
      break;
    case tokenCroNameStart:
      if (!parseNamedCharRef())
	return 0;
      break;
    case tokenEroGrpo:
      message(inInstance() ? Messages::eroGrpoStartTag : Messages::eroGrpoProlog);
      break;
    case tokenLit:
    case tokenLita:
      goto done;
    case tokenEroNameStart:
    case tokenPeroNameStart:
      {
	ConstResourcePointer<Entity> entity;
	ResourcePointer<EntityOrigin> origin;
	if (!parseEntityReference(token == tokenPeroNameStart, 0, entity, origin))
	  return 0;
	if (!entity.isNull())
	  entity->litReference(text, *this, origin,
			       (flags & literalSingleSpace) != 0);
	if (inputLevel() > startLevel)
	  currentMode = liteMode;
      }
      break;
    case tokenPeroGrpo:
      message(Messages::peroGrpoProlog);
      break;
    case tokenChar:
      if (text.length() > reallyMaxLength && inputLevel() == startLevel
	  && currentChar() == syntax().standardFunction(Syntax::fRE)) {
	message(tooLongMessage, NumberMessageArg(maxLength));
	// guess that the closing delimiter has been omitted
	message(Messages::literalClosingDelimiter);
	return 0;
      }
      text.addChar(currentChar(), currentLocation());
      break;
    }
  }
 done:
  if ((flags & literalSingleSpace)
      && text.length() > 0
      && text.lastChar() == syntax().space())
    text.ignoreLastChar();
  if (text.length() > maxLength)
    message(tooLongMessage, NumberMessageArg(maxLength));
  return 1;
}

Boolean Parser::parseNamedCharRef()
{
  InputSource *in = currentInput();
  Index startIndex = currentLocation().index();
  in->discardInitial();
  extendNameToken(syntax().namelen(), Messages::nameLength);
  Char c;
  Boolean valid;
  CString name;
  getCurrentToken(syntax().generalSubstTable(), name);
  if (!syntax().lookupFunctionChar(name, &c)) {
    message(Messages::functionName, StringMessageArg(name));
    valid = 0;
  }
  else
    valid = 1;
  (void)getToken(refMode);
  in->startToken();
  if (valid)
    in->pushCharRef(c, NamedCharRef(startIndex));
  return 1;
}

Boolean Parser::parseNumericCharRef(Char &ch, Location &loc)
{
  InputSource *in = currentInput();
  Location startLocation = currentLocation();
  in->discardInitial();
  extendNumber(syntax().namelen(), Messages::numberLength);
  Boolean valid = 1;
  Char c = 0;
  const Char *lim = in->currentTokenEnd();
  for (const Char *p = in->currentTokenStart(); p < lim; p++) {
    int val = sd().digitWeight(*p);
    if (c <= Char(-1)/10 && (c *= 10) <= Char(-1) - val)
      c += val;
    else {
      message(Messages::characterNumber, StringMessageArg(currentToken()));
      valid = 0;
      break;
    }
  }
  (void)getToken(refMode);
  if (valid) {
    ch = c;
    loc = Location(new NumericCharRefOrigin(startLocation,
					    currentLocation().index()), 0);
  }
  return valid;
}

// current token is pero or ero + name start character

Boolean Parser::parseEntityReference(Boolean isParameter,
				     Boolean hasNameGroup,
				     ConstResourcePointer<Entity> &entity,
				     ResourcePointer<EntityOrigin> &origin)
{
  InputSource *in = currentInput();
  Location startLocation = in->currentLocation();
  Boolean active;
  if (hasNameGroup) {
    if (!parseEntityReferenceNameGroup(active))
      return 0;
    in->startToken();
    Xchar c = in->tokenChar(inputContext());
    if (!syntax().isNameStartCharacter(c)) {
      message(Messages::entityReferenceMissingName);
      return 0;
    }
  }
  else
    active = 1;
  in->discardInitial();
  if (isParameter)
    extendNameToken(syntax().penamelen(), Messages::parameterEntityNameLength);
  else
    extendNameToken(syntax().namelen(), Messages::nameLength);
  if (!active) {
    entity = 0;
    getToken(refMode);
    return 1;
  }
  CString name;
  getCurrentToken(syntax().entitySubstTable(), name);
  Boolean defaulted;
  entity = lookupEntity(isParameter, name, 1, defaulted);
  if (entity.isNull()) {
    if (haveApplicableDtd())
      message(isParameter
	      ? Messages::parameterEntityUndefined
	      : Messages::entityUndefined,
	      StringMessageArg(name));
    else
      message(Messages::entityApplicableDtd);
  }
  else if (defaulted && options().warnDefaultEntityReference)
    message(Messages::defaultEntityReference, StringMessageArg(name));

  (void)getToken(refMode);
    
  if (!entity.isNull())
    origin = new (internalAllocator())
               EntityOrigin(entity, startLocation,
			    currentInput()->currentLocation().index()
			    - startLocation.index());
  else
    origin = 0;
  return 1;
}

Boolean Parser::parseComment(Mode mode)
{
  Token token;
  while ((token = getToken(mode)) != tokenCom)
    switch (token) {
    case tokenUnrecognized:
      if (!reportNonSgmlCharacter())
	message(Messages::sdCommentSignificant,
		StringMessageArg(currentToken()));
      break;
    case tokenEe:
      message(Messages::commentEntityEnd);
      return 0;
    }
  return 1;
}

void Parser::extendNameToken(size_t maxLength, Messages::Type1 tooLongMessage)
{
  InputSource *in = currentInput();
  size_t length = in->currentTokenLength();
  while (syntax().isNameCharacter(in->tokenChar(inputContext())))
    length++;
  if (length > maxLength)
    message(tooLongMessage, NumberMessageArg(maxLength));
  in->endToken(length);
}


void Parser::extendNumber(size_t maxLength, Messages::Type1 tooLongMessage)
{
  InputSource *in = currentInput();
  size_t length = in->currentTokenLength();
  while (syntax().isDigit(in->tokenChar(inputContext())))
    length++;
  if (length > maxLength)
    message(tooLongMessage, NumberMessageArg(maxLength));
  in->endToken(length);
}

Boolean Parser::reportNonSgmlCharacter()
{
  Char c = getChar();
  if (!syntax().isSgmlChar(c)) {
    message(Messages::nonSgmlCharacter, NumberMessageArg(c));
    return 1;
  }
  return 0;
}

void Parser::extendS()
{
  InputSource *in = currentInput();
  size_t length = in->currentTokenLength();
  while (syntax().isS(in->tokenChar(inputContext())))
    length++;
  in->endToken(length);
}
