// Copyright (c) 1994 James Clark
// See the file COPYING for copying permission.

#include "Parser.H"
#include "macros.H"
#include "SdFormalError.H"
#include "MessageBuilder.H"
#include "Fragments.H"
#include "MessageArg.H"
#include "CharsetRegistry.H"
#include "ISetIter.H"
#include "token.H"
#include "TokenMessageArg.H"

// Information about the SGML declaration being built.

struct SdBuilder {
  SdBuilder();
  void addFormalError(const Location &, Messages::Type1, const CString &);
  ResourcePointer<Sd> sd;
  ResourcePointer<Syntax> syntax;
  CharsetDecl syntaxCharsetDecl;
  CharsetInfo syntaxCharset;
  Boolean valid;
  IList<SdFormalError> formalErrorList;
};

class CharSwitcher {
public:
  CharSwitcher();
  CharSwitcher(const VectorBase<UnivChar> *switches);
  UnivChar subst(UnivChar c);
  size_t nSwitches() const;
  Boolean switchUsed(size_t i) const;
  UnivChar switchFrom(size_t i) const;
private:
  Vector<PackedBoolean> switchUsed_;
  const VectorBase<UnivChar> *switches_;
};

class CharsetMessageArg : public MessageArg {
public:
  CharsetMessageArg(const ISet<WideChar> &set);
  MessageArg *copy() const;
  void append(MessageBuilder &) const;
private:
  ISet<WideChar> set_;
};

struct SdParam {
  typedef unsigned char Type;
  enum {
    invalid,
    minimumLiteral,
    mdc,
    number,
    capacityName,
    name,
    paramLiteral,
    generalDelimiterName,
    referenceReservedName,
    quantityName,
    reservedName		// Sd::ReservedName is added to this
  };
  Type type;
  CString token;
  Text literalText;
  String<SyntaxChar> paramLiteralText;
  union {
    Number n;
    Sd::Capacity capacityIndex;
    Syntax::Quantity quantityIndex;
    Syntax::ReservedName reservedNameIndex;
    Syntax::DelimGeneral delimGeneralIndex;
  };
};

class AllowedSdParams {
public:
  AllowedSdParams(SdParam::Type,
		  SdParam::Type = SdParam::invalid,
		  SdParam::Type = SdParam::invalid,
		  SdParam::Type = SdParam::invalid,
		  SdParam::Type = SdParam::invalid,
		  SdParam::Type = SdParam::invalid);
  Boolean param(SdParam::Type) const;
  SdParam::Type get(int i) const;
private:
  enum { maxAllow = 6 };
  SdParam::Type allow_[maxAllow];
};

class AllowedSdParamsMessageArg : public MessageArg {
public:
  AllowedSdParamsMessageArg(const AllowedSdParams &allow,
			    const ConstResourcePointer<Sd> &sd);
  MessageArg *copy() const;
  void append(MessageBuilder &) const;
private:
  AllowedSdParams allow_;
  ConstResourcePointer<Sd> sd_;
};

struct StandardSyntaxSpec {
  struct AddedFunction {
    const char *name;
    Syntax::FunctionClass functionClass;
    UnivChar univChar;
  };
  const AddedFunction *addedFunction;
  size_t nAddedFunction;
  Boolean shortref;
};

static StandardSyntaxSpec::AddedFunction coreFunctions[] = {
  { "TAB", Syntax::cSEPCHAR, 9 },
};

static StandardSyntaxSpec::AddedFunction multicodeFunctions[] = {
  { "TAB", Syntax::cSEPCHAR, 9 },
  // The syntax reference character set in the Multicode Basic Concrete
  // Syntax in ISO 8879 is incorrect.  The function LSO is assigned to
  // the character whose meaning is "LS1 in ISO 2022", and the function
  // LS1 is assigned to the character whose meaning is "LS0 in ISO 2022".
  // I'm not correcting this because Exoterica's test suite assumes the
  // incorrect version.
  // LS1 in ISO 2022 is universal character number 14 (= SO)
  // LSO in ISO 2022 is universal character number 15 (= SI)
  { "ESC", Syntax::cMSOCHAR, 27 },
  { "LS0", Syntax::cMSICHAR, 14 },
  { "LS1", Syntax::cMSOCHAR, 15 },
  { "SS2", Syntax::cMSSCHAR, 142 },
  { "SS3", Syntax::cMSSCHAR, 143 }
};

// This converts a number in a syntax reference character set into
// a universal character number.

static UnivChar syntaxRefCharToUnivChar(UnivChar c)
{
  if (c == 14)
    return 15;
  if (c == 15)
    return 14;
  return c;
}

inline UnivChar univCharToSyntaxRefChar(UnivChar c)
{
  return syntaxRefCharToUnivChar(c);
}

static StandardSyntaxSpec coreSyntax = {
  coreFunctions, SIZEOF(coreFunctions), 0
};

static StandardSyntaxSpec refSyntax = {
  coreFunctions, SIZEOF(coreFunctions), 1
};

static StandardSyntaxSpec multicodeCoreSyntax = {
  multicodeFunctions, SIZEOF(multicodeFunctions), 0
};

static StandardSyntaxSpec multicodeBasicSyntax = {
  multicodeFunctions, SIZEOF(multicodeFunctions), 1
};

void Parser::doInit()
{
  const CharsetInfo &initCharset(sd().docCharset());
  ISet<WideChar> missing;
  findMissingMinimum(initCharset, missing);
  if (!missing.isEmpty()) {
    message(Messages::sdMissingCharacters, CharsetMessageArg(missing));
    giveUp();
    return;
  }
  if (scanForSgmlDecl(initCharset)) {
    Location startLocation(currentLocation());
    // startLocation points to start of <!SGML
    startLocation += currentInput()->currentTokenLength() - 6;
    if (esisPlus() && currentInput()->currentTokenLength() > 6) // <!SGML
      eventHandler().ignoredChars(
        new (eventAllocator()) IgnoredCharsEvent(currentInput()->currentTokenStart(),
						 currentInput()->currentTokenLength() - 6,
						 currentLocation(),
						 1));
    Syntax *syntaxp = new Syntax(sd());
    CharSwitcher switcher;
    if (!setStandardSyntax(*syntaxp, refSyntax, sd().docCharset(),
			   switcher)) {
      giveUp();
      return;
    }
    syntaxp->implySgmlChar(sd().docCharset());
    setSyntax(syntaxp);
    compileSdModes();
    if (!parseSgmlDecl()) {
      giveUp();
      return;
    }
    // queue an SGML declaration event
    eventHandler().sgmlDecl(new (eventAllocator())
			    SgmlDeclEvent(sdPointer(),
					  syntaxPointer(),
					  instanceSyntaxPointer(),
					  startLocation,
					  currentInput()->nextIndex()));
				  
  }
  else {
    currentInput()->ungetToken();
    if (!implySgmlDecl()) {
      giveUp();
      return;
    }
    // queue an SGML declaration event
    eventHandler().sgmlDecl(new (eventAllocator())
			    SgmlDeclEvent(sdPointer(),
					  syntaxPointer()));
  }
						      
  // Now we have sd and syntax set up, prepare to parse the prolog.
  compilePrologModes();
  setDoFunction(&Parser::doProlog);
}

Boolean Parser::implySgmlDecl()
{
  Syntax *syntaxp = new Syntax(sd());
  const StandardSyntaxSpec *spec;
  if (options().shortref) {
    if (options().multicode)
      spec = &multicodeBasicSyntax;
    else
      spec = &refSyntax;
  }
  else {
    if (options().multicode)
      spec = &multicodeCoreSyntax;
    else
      spec = &coreSyntax;
  }
  CharSwitcher switcher;
  if (!setStandardSyntax(*syntaxp, *spec, sd().docCharset(), switcher))
    return 0;
  syntaxp->implySgmlChar(sd().docCharset());
  setSyntax(syntaxp);
  return 1;
}

Boolean Parser::setStandardSyntax(Syntax &syn,
				  const StandardSyntaxSpec &spec,
				  const CharsetInfo &docCharset,
				  CharSwitcher &switcher)
{
  Boolean valid = 1;
  ISet<WideChar> missing;
  static const Char shunchar[] = {
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    127, 255
    };
  
  size_t i;
  for (i = 0; i < SIZEOF(shunchar); i++)
    syn.addShunchar(shunchar[i]);
  syn.setShuncharControls();
  static Syntax::StandardFunction standardFunctions[3] = {
    Syntax::fRE, Syntax::fRS, Syntax::fSPACE
  };
  static UnivChar functionChars[3] = { 13, 10, 32 };
  for (i = 0; i < 3; i++) {
    UnivChar univChar = switcher.subst(functionChars[i]);
    Char docChar;
    if (univToDescCheck(docCharset, univChar, docChar)) {
      if (checkNotFunction(syn, docChar))
	syn.setStandardFunction(standardFunctions[i], docChar);
      else
	valid = 0;
    }
    else
      missing += univChar;
  }
  for (i = 0; i < spec.nAddedFunction; i++) {
    Char docChar;
    UnivChar univChar = switcher.subst(spec.addedFunction[i].univChar);
    if (univToDescCheck(docCharset, univChar, docChar)) {
      if (checkNotFunction(syn, docChar))
	syn.addFunctionChar(docCharset.execToDesc(spec.addedFunction[i].name),
			    spec.addedFunction[i].functionClass,
			    docChar);
    }
    else
      missing += univChar;
  }
  
  static UnivChar nameChars[2] = { 45, 46 }; // '-' '.'
  for (i = 0; i < 2; i++) {
    UnivChar univChar = switcher.subst(nameChars[i]);
    Char docChar;
    if (univToDescCheck(docCharset, univChar, docChar)) {
      if (checkNmchar(syn, univChar, docChar))
	syn.addNmcharPair(docChar, docChar);
    }
    else
      missing += univChar;
  }
  syn.setNamecaseGeneral(1);
  syn.setNamecaseEntity(0);
  if (!setRefDelimGeneral(syn, docCharset, switcher, missing))
    valid = 0;
  setRefNames(syn, docCharset);
  syn.enterStandardFunctionNames();
  if (spec.shortref
      && !addRefDelimShortref(syn, docCharset, switcher, missing))
    valid = 0;
  if (!missing.isEmpty()) {
    message(Messages::missingSignificant, CharsetMessageArg(missing));
    valid = 0;
  }
  size_t nSwitches = switcher.nSwitches();
  for (i = 0; i < nSwitches; i++)
    if (!switcher.switchUsed(i))
      // If the switch wasn't used,
      // then the character wasn't a markup character.
      message(Messages::switchNotMarkup,
	      NumberMessageArg(univCharToSyntaxRefChar(switcher.switchFrom(i))));
  return valid;
}

Boolean Parser::setRefDelimGeneral(Syntax &syntax,
				   const CharsetInfo &docCharset,
				   CharSwitcher &switcher,
				   ISet<WideChar> &missing)
{
  // Column 3 from Figure 3
  static const char delims[][2] = {
    { 38 },
    { 45, 45 },
    { 38, 35 },
    { 93 },
    { 91 },
    { 93 },
    { 91 },
    { 38 },
    { 60, 47 },
    { 41 },
    { 40 },
    { 34 },
    { 39 },
    { 62 },
    { 60, 33 },
    { 45 },
    { 93, 93 },
    { 47 },
    { 63 },
    { 124 },
    { 37 },
    { 62 },
    { 60, 63 },
    { 43 },
    { 59 },
    { 42 },
    { 35 },
    { 44 },
    { 60 },
    { 62 },
    { 61 },
  };
  Boolean valid = 1;
  for (int i = 0; i < Syntax::nDelimGeneral; i++)
    if (syntax.delimGeneral(i).length() == 0) {
      CString delim;
      for (int j = 0; j < 2 && delims[i][j] != '\0'; j++) {
	Char c;
	UnivChar univChar = switcher.subst(delims[i][j]);
	if (univToDescCheck(docCharset, univChar, c))
	  delim += c;
	else {
	  missing += univChar;
	  valid = 0;
	}
      }
      if (delim.length() == j) {
	if (checkGeneralDelim(syntax, delim))
	  syntax.setDelimGeneral(i, delim);
	else
	  valid = 0;
      }
    }
  return valid;
}

void Parser::setRefNames(Syntax &syntax, const CharsetInfo &docCharset)
{
  static const char *const referenceNames[] = {
    "ANY",
    "ATTLIST",
    "CDATA",
    "CONREF",
    "CURRENT",
    "DEFAULT",
    "DOCTYPE",
    "ELEMENT",
    "EMPTY",
    "ENDTAG",
    "ENTITIES",
    "ENTITY",
    "FIXED",
    "ID",
    "IDLINK",
    "IDREF",
    "IDREFS",
    "IGNORE",
    "IMPLIED",
    "INCLUDE",
    "INITIAL",
    "LINK",
    "LINKTYPE",
    "MD",
    "MS",
    "NAME",
    "NAMES",
    "NDATA",
    "NMTOKEN",
    "NMTOKENS",
    "NOTATION",
    "NUMBER",
    "NUMBERS",
    "NUTOKEN",
    "NUTOKENS",
    "O",
    "PCDATA",
    "PI",
    "POSTLINK",
    "PUBLIC",
    "RCDATA",
    "RE",
    "REQUIRED",
    "RESTORE",
    "RS",
    "SDATA",
    "SHORTREF",
    "SIMPLE",
    "SPACE",
    "STARTTAG",
    "SUBDOC",
    "SYSTEM",
    "TEMP",
    "USELINK",
    "USEMAP"
    };

  int i;
  for (i = 0; i < Syntax::nNames; i++)
    if (syntax.reservedName(Syntax::ReservedName(i)).length() == 0)
      syntax.setName(i, docCharset.execToDesc(referenceNames[i]));
}

Boolean Parser::addRefDelimShortref(Syntax &syntax,
				    const CharsetInfo &docCharset,
				    CharSwitcher &switcher,
				    ISet<WideChar> &missing)
{
  // Column 2 from Figure 4
  static const char delimShortref[][3] = {
    { 9 },
    { 13 },
    { 10 },
    { 10, 66 },
    { 10, 13 },
    { 10, 66, 13 },
    { 66, 13 },
    { 32 },
    { 66, 66 },
    { 34 },
    { 35 },
    { 37 },
    { 39 },
    { 40 },
    { 41 },
    { 42 },
    { 43 },
    { 44 },
    { 45 },
    { 45, 45 },
    { 58 },
    { 59 },
    { 61 },
    { 64 },
    { 91 },
    { 93 },
    { 94 },
    { 95 },
    { 123 },
    { 124 },
    { 125 },
    { 126 },
  };

  for (int i = 0; i < SIZEOF(delimShortref); i++) {
    CString delim;
    
    for (int j = 0; j < 3 && delimShortref[i][j] != '\0'; j++) {
      Char c;
      UnivChar univChar = switcher.subst(delimShortref[i][j]);
      if (univToDescCheck(docCharset, univChar, c))
	delim += c;
      else
	missing += univChar;
    }
    if (delim.length() == j) {
      int tem;
      if (switcher.nSwitches() > 0
	  && syntax.lookupShortref(delim, &tem))
	message(Messages::duplicateDelimShortref,
		StringMessageArg(delim));
      else
	syntax.addDelimShortref(delim);
    }
  }
  return 1;
}

// Determine whether the document starts with an SGML declaration.
// There is no current syntax at this point.

Boolean Parser::scanForSgmlDecl(const CharsetInfo &initCharset)
{
  Char rs;
  if (!univToDescCheck(initCharset, UnivCharsetDesc::rs, rs))
    return 0;
  Char re;
  if (!univToDescCheck(initCharset, UnivCharsetDesc::re, re))
    return 0;
  Char space;
  if (!univToDescCheck(initCharset, UnivCharsetDesc::space, space))
    return 0;
  Char tab;
  if (!univToDescCheck(initCharset, UnivCharsetDesc::tab, tab))
    return 0;
  InputSource *in = currentInput();
  Xchar c = in->get(inputContext());
  while (c == rs || c == space || c == re || c == tab)
    c = in->tokenChar(inputContext());
  if (c != initCharset.execToDesc('<'))
    return 0;
  if (in->tokenChar(inputContext()) != initCharset.execToDesc('!'))
    return 0;
  c = in->tokenChar(inputContext());
  if (c != initCharset.execToDesc('S')
      && c != initCharset.execToDesc('s'))
    return 0;
  c = in->tokenChar(inputContext());
  if (c != initCharset.execToDesc('G')
      && c != initCharset.execToDesc('g'))
    return 0;
  c = in->tokenChar(inputContext());
  if (c != initCharset.execToDesc('M')
      && c != initCharset.execToDesc('m'))
    return 0;
  c = in->tokenChar(inputContext());
  if (c != initCharset.execToDesc('L')
      && c != initCharset.execToDesc('l'))
    return 0;
  c = in->tokenChar(inputContext());
  // Don't recognize this if SGML is followed by a name character.
  if (c == Xchar(-1))
    return 1;
  in->endToken(in->currentTokenLength() - 1);
  if (c == initCharset.execToDesc('-'))
    return 0;
  if (c == initCharset.execToDesc('.'))
    return 0;
  UnivChar univ;
  if (!initCharset.descToUniv(c, univ)) 
    return 1;
  if (UnivCharsetDesc::a <= univ && univ < UnivCharsetDesc::a + 26)
    return 0;
  if (UnivCharsetDesc::A <= univ && univ < UnivCharsetDesc::A + 26)
    return 0;
  if (UnivCharsetDesc::zero <= univ && univ < UnivCharsetDesc::zero + 10)
    return 0;
  return 1;
}
	    
void Parser::findMissingMinimum(const CharsetInfo &charset,
				ISet<WideChar> &missing)
{
  Char to;
  int i;
  for (i = 0; i < 26; i++) {
    if (!univToDescCheck(charset, UnivCharsetDesc::A + i, to))
      missing += UnivCharsetDesc::A + i;
    if (!univToDescCheck(charset, UnivCharsetDesc::a + i, to))
      missing += UnivCharsetDesc::a + i;
  }
  for (i = 0; i < 10; i++) {
    Char to;
    if (!univToDescCheck(charset, UnivCharsetDesc::zero + i, to))
      missing += UnivCharsetDesc::zero + i;
  }
  static const UnivChar special[] = {
    39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63
    };

  for (i = 0; i < SIZEOF(special); i++)
    if (!univToDescCheck(charset, special[i], to))
      missing += special[i];
}


Boolean Parser::parseSgmlDecl()
{
  SdParam parm;
  SdBuilder sdBuilder;

  if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
    return 0;
  CString version(sd().execToDoc("ISO 8879:1986"));
  if (parm.literalText.string() != version)
    message(Messages::standardVersion,
	    StringMessageArg(parm.literalText.string()));
  typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
  static SdParser parsers[] = {
    &Parser::sdParseDocumentCharset,
    &Parser::sdParseCapacity,
    &Parser::sdParseScope,
    &Parser::sdParseSyntax,
    &Parser::sdParseFeatures,
    &Parser::sdParseAppinfo,
  };
  for (int i = 0; i < SIZEOF(parsers); i++)
    if (!(this->*(parsers[i]))(sdBuilder, parm))
      return 0;
  if (!parseSdParam(AllowedSdParams(SdParam::mdc), parm))
    return 0;
  if (!sdBuilder.valid)
    return 0;
  if (sdBuilder.sd->formal()) {
    while (!sdBuilder.formalErrorList.empty()) {
      SdFormalError *p = sdBuilder.formalErrorList.get();
      ParserState *state = this; // work around lcc 3.0 bug
      p->send(*state);
      delete p;
    }
  }
  setSd(sdBuilder.sd.pointer());
  if (sdBuilder.sd->scopeInstance()) {
    Syntax *proSyntax = new Syntax(sd());
    CharSwitcher switcher;
    setStandardSyntax(*proSyntax, refSyntax, sd().docCharset(), switcher);
    proSyntax->setSgmlChar(*sdBuilder.syntax->charSet(Syntax::sgmlChar));
    ISet<WideChar> invalidSgmlChar;
    proSyntax->checkSgmlChar(sdBuilder.sd->docCharset(),
			     sdBuilder.syntax.pointer(),
			     invalidSgmlChar);
    sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
				    proSyntax,
				    invalidSgmlChar);
    if (!invalidSgmlChar.isEmpty())
      message(Messages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
    setSyntaxes(proSyntax, sdBuilder.syntax.pointer());
  }
  else
    setSyntax(sdBuilder.syntax.pointer());
  if (syntax().multicode())
    currentInput()->setMarkupScanTable(syntax().markupScanTable());
  return 1;
}

Boolean Parser::sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm)
{
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rCHARSET),
		    parm))
    return 0;
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
		    parm))
    return 0;
  CharsetDecl decl;
  UnivCharsetDesc desc;
  if (!sdParseCharset(sdBuilder, parm, 1, decl, desc))
    return 0;
  ISet<WideChar> missing;
  findMissingMinimum(desc, missing);
  if (!missing.isEmpty()) {
    message(Messages::missingMinimumChars,
	    CharsetMessageArg(missing));
    return 0;
  }
  ISet<Char> sgmlChar;
  decl.usedSet(sgmlChar);
  sdBuilder.sd = new Sd(desc);
  sdBuilder.sd->setDocCharsetDecl(decl);
  sdBuilder.syntax = new Syntax(*sdBuilder.sd);
  sdBuilder.syntax->setSgmlChar(sgmlChar);
  return 1;
}

Boolean Parser::sdParseCharset(SdBuilder &sdBuilder,
			       SdParam &parm,
			       Boolean isDocument,
			       CharsetDecl &decl,
			       UnivCharsetDesc &desc)
{
  decl.clear();
  ISet<WideChar> multiplyDeclared;
  // This is for checking whether the syntax reference character set
  // is ISO 646 when SCOPE is INSTANCE.
  Boolean maybeISO646 = 1;
  do {
    if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
      return 0;
    UnivCharsetDesc baseDesc;
    PublicId id;
    Boolean found;
    PublicId::TextClass textClass;
    PublicId::FormalError err;
    if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
      sdBuilder.addFormalError(currentLocation(),
			       publicIdFormalErrorMessage(err),
			       id.string());
    else if (id.getTextClass(textClass)
	     && textClass != PublicId::CHARSET)
      sdBuilder.addFormalError(currentLocation(),
			       Messages::basesetTextClass,
			       id.string());
    found = CharsetRegistry::findCharset(id, sd().docCharset(), baseDesc);
    if (!found && options().warnSgmlDecl)
      message(Messages::unknownBaseset, StringMessageArg(id.string()));
    if (!found)
      maybeISO646 = 0;
    decl.addSection(id);
    if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDESCSET),
		      parm))
      return 0;
    if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
      return 0;
    do {
      WideChar min = parm.n;
      if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
	return 0;
      Number count = parm.n;
      Number adjCount;
      if (options().warnSgmlDecl && count == 0)
	message(Messages::zeroNumberOfCharacters);
      decl.rangeDeclared(min, count, multiplyDeclared);
      if (isDocument
	  && count > 0
	  && (min > Char(-1) || count - 1 > Char(-1) - min)) {
	if (options().warnSgmlDecl)
	  message(Messages::documentCharMax, NumberMessageArg(Char(-1)));
	adjCount = min > Char(-1) ? 0 : 1 + (Char(-1) - min);
	maybeISO646 = 0;
      }
      else
	adjCount = count;
      if (!parseSdParam(AllowedSdParams(SdParam::number,
					SdParam::minimumLiteral,
					SdParam::reservedName + Sd::rUNUSED),
			parm))
	return 0;
      switch (parm.type) {
      case SdParam::number:
	decl.addRange(min, count, parm.n);
	if (found && adjCount > 0) {
	  ISet<WideChar> baseMissing;
	  desc.addBaseRange(baseDesc, min, min + (adjCount - 1), parm.n,
			    baseMissing);
	  if (!baseMissing.isEmpty() && options().warnSgmlDecl)
	    message(Messages::basesetCharsMissing,
		    CharsetMessageArg(baseMissing));
	}
	break;
      case SdParam::reservedName + Sd::rUNUSED:
	decl.addRange(min, count);
	break;
      case SdParam::minimumLiteral:
	UnivChar c;
	if (CharsetRegistry::findNamedChar(parm.literalText.string(),
					   sd().docCharset(),
					   c)) {
	  // Don't blow up if somebody says N 9999999 "foo"
	  // FIXME report an error here
	  if (adjCount < 256) {
	    for (size_t i = 0; i < adjCount; i++)
	      desc.addRange(min + i, min + i, c);
	  }
	}
	maybeISO646 = 0;
        decl.addRange(min, count, parm.literalText.string());
        break;
      default:
        CANNOT_HAPPEN();			 
      }
      SdParam::Type follow = (isDocument
			      ? SdParam::reservedName + Sd::rCAPACITY
			      : SdParam::reservedName + Sd::rFUNCTION);
      if (!parseSdParam(AllowedSdParams(SdParam::number,
					SdParam::reservedName + Sd::rBASESET,
					follow),
			parm))
	return 0;
	
    } while (parm.type == SdParam::number);
  } while (parm.type == SdParam::reservedName + Sd::rBASESET);
  if (!multiplyDeclared.isEmpty())
    message(Messages::duplicateCharNumbers,
	    CharsetMessageArg(multiplyDeclared));
  ISet<WideChar> declaredSet;
  decl.declaredSet(declaredSet);
  ISetIter<WideChar> iter(declaredSet);
  WideChar min, max, lastMax;
  if (iter.next(min, max)) {
    ISet<WideChar> holes;
    lastMax = max;
    while (iter.next(min, max)) {
      if (min - lastMax > 1)
	holes.addRange(lastMax + 1, min - 1);
      lastMax = max;
    }
    if (!holes.isEmpty() && options().warnSgmlDecl)
      message(Messages::codeSetHoles, CharsetMessageArg(holes));
  }
  if (!isDocument && sdBuilder.sd->scopeInstance()) {
    // If scope is INSTANCE, syntax reference character set
    // must be same as reference.
    UnivCharsetDescIter iter(desc);
    WideChar descMin, descMax;
    UnivChar univMin;
    if (!iter.next(descMin, descMax, univMin)
	|| descMin != 0
	|| descMax != 127
	|| univMin != 0
	|| !maybeISO646)
      message(Messages::scopeInstanceSyntaxCharset);
  }
  return 1;
}

Boolean Parser::sdParseCapacity(SdBuilder &sdBuilder, SdParam &parm)
{
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rPUBLIC,
				    SdParam::reservedName + Sd::rSGMLREF),
		    parm))
    return 0;
  if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
    if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
      return 0;
    PublicId id;
    PublicId::TextClass textClass;
    PublicId::FormalError err;
    if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
      sdBuilder.addFormalError(currentLocation(),
			       publicIdFormalErrorMessage(err),
			       id.string());
    else if (id.getTextClass(textClass)
	     && textClass != PublicId::CAPACITY)
      sdBuilder.addFormalError(currentLocation(),
			       Messages::capacityTextClass,
			       id.string());
    const CString &str(id.string());
    if (str != sd().execToDoc("ISO 8879-1986//CAPACITY Reference//EN")
	&& str != sd().execToDoc("ISO 8879:1986//CAPACITY Reference//EN"))
      message(Messages::unknownCapacitySet, StringMessageArg(str));
    if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
		      parm))
      return 0;
  }
  else {
    PackedBoolean capacitySpecified[Sd::nCapacity];
    for (int i = 0; i < Sd::nCapacity; i++)
      capacitySpecified[i] = 0;
    if (!parseSdParam(AllowedSdParams(SdParam::capacityName), parm))
      return 0;
    do {
      Sd::Capacity capacityIndex = parm.capacityIndex;
      if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
	return 0;
   
      if (!capacitySpecified[capacityIndex]) {
	sdBuilder.sd->setCapacity(capacityIndex, parm.n);
	capacitySpecified[capacityIndex] = 1;
      }
      else if (options().warnSgmlDecl)
	message(Messages::duplicateCapacity,
		StringMessageArg(sd().capacityName(i)));
      if (!parseSdParam(AllowedSdParams(SdParam::capacityName,
					SdParam::reservedName + Sd::rSCOPE),
			parm))
	return 0;
    } while (parm.type == SdParam::capacityName);
    Number totalcap = sdBuilder.sd->capacity(0);
    for (i = 1; i < Sd::nCapacity; i++)
      if (sdBuilder.sd->capacity(i) > totalcap)
	message(Messages::capacityExceedsTotalcap,
		StringMessageArg(sd().capacityName(i)));
  }
  return 1;
}

Boolean Parser::sdParseScope(SdBuilder &sdBuilder, SdParam &parm)
{
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rINSTANCE,
				    SdParam::reservedName + Sd::rDOCUMENT),
		    parm))
    return 0;
  if (parm.type == SdParam::reservedName + Sd::rINSTANCE)
    sdBuilder.sd->setScopeInstance();
  return 1;
}

Boolean Parser::sdParseSyntax(SdBuilder &sdBuilder, SdParam &parm)
{
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYNTAX),
		    parm))
    return 0;
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSHUNCHAR,
				    SdParam::reservedName + Sd::rPUBLIC),
		    parm))
    return 0;

  if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
    if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
      return 0;
    PublicId id;
    PublicId::FormalError err;
    PublicId::TextClass textClass;
    if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
      sdBuilder.addFormalError(currentLocation(),
			       publicIdFormalErrorMessage(err),
			       id.string());
    else if (id.getTextClass(textClass)
	     && textClass != PublicId::SYNTAX)
      sdBuilder.addFormalError(currentLocation(),
			       Messages::syntaxTextClass,
			       id.string());
    if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rFEATURES,
				      SdParam::reservedName + Sd::rSWITCHES),
		      parm))
      return 0;
    GrowableVector<UnivChar> charSwitches;
    if (parm.type == SdParam::reservedName + Sd::rSWITCHES) {
      if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
	return 0;
      for (;;) {
	UnivChar c = parm.n;
	Boolean valid = checkSwitch(parm.n);
	if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
	  return 0;
	// always add characters in pairs
	if (checkSwitch(parm.n) && valid) {
	  charSwitches.grow() = syntaxRefCharToUnivChar(c);
	  charSwitches.grow() = syntaxRefCharToUnivChar(parm.n);
	}
	if (!parseSdParam(AllowedSdParams(SdParam::number,
					  SdParam::reservedName
					  + Sd::rFEATURES),
			  parm))
	  return 0;
	if (parm.type != SdParam::number)
	  break;
      }
    }
    const StandardSyntaxSpec *spec = lookupSyntax(id);
    if (spec) {
      CharSwitcher switcher(&charSwitches);
      if (!setStandardSyntax(*sdBuilder.syntax,
			     *spec,
			     sdBuilder.sd->docCharset(),
			     switcher))
	sdBuilder.valid = 0;
    }
    else {
      sdBuilder.valid = 0;
      message(Messages::unknownPublicSyntax);
    }
  }
  else {
    typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
    static SdParser parsers[] = {
      &Parser::sdParseShunchar,
      &Parser::sdParseSyntaxCharset,
      &Parser::sdParseFunction,
      &Parser::sdParseNaming,
      &Parser::sdParseDelim,
      &Parser::sdParseNames,
      &Parser::sdParseQuantity
    };
    for (int i = 0; i < SIZEOF(parsers); i++)
      if (!(this->*(parsers[i]))(sdBuilder, parm))
	return 0;
  }
  if (!sdBuilder.sd->scopeInstance()) {
    // we know the significant chars now
    ISet<WideChar> invalidSgmlChar;
    sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
				    0,
				    invalidSgmlChar);
    if (!invalidSgmlChar.isEmpty())
      message(Messages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
  }
  checkSyntaxNamelen(*sdBuilder.syntax);
  return 1;
}

const StandardSyntaxSpec *Parser::lookupSyntax(const PublicId &id)
{
  PublicId::OwnerType ownerType;
  if (!id.getOwnerType(ownerType) || ownerType != PublicId::ISO)
    return 0;
  CString str;
  if (!id.getOwner(str))
    return 0;
  if (str != sd().execToDoc("ISO 8879:1986")
      && str != sd().execToDoc("ISO 8879-1986"))
    return 0;
  PublicId::TextClass textClass;
  if (!id.getTextClass(textClass) || textClass != PublicId::SYNTAX)
    return 0;
  if (!id.getDescription(str))
    return 0;
  if (str == sd().execToDoc("Reference"))
    return &refSyntax;
  if (str == sd().execToDoc("Core"))
    return &coreSyntax;
  if (str == sd().execToDoc("Multicode Basic"))
    return &multicodeBasicSyntax;
  if (str == sd().execToDoc("Multicode Core"))
    return &multicodeCoreSyntax;
  return 0;
}

Boolean Parser::sdParseSyntaxCharset(SdBuilder &sdBuilder, SdParam &parm)
{
  UnivCharsetDesc desc;
  if (!sdParseCharset(sdBuilder, parm, 0, sdBuilder.syntaxCharsetDecl, desc))
    return 0;
  sdBuilder.syntaxCharset.set(desc);
  return 1;
}

Boolean Parser::sdParseShunchar(SdBuilder &sdBuilder, SdParam &parm)
{
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
				    SdParam::reservedName + Sd::rCONTROLS,
				    SdParam::number), parm))
    return 0;
  if (parm.type == SdParam::reservedName + Sd::rNONE) {
    if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
		      parm))
      return 0;
    return 1;
  }
  if (parm.type == SdParam::reservedName + Sd::rCONTROLS)
    sdBuilder.syntax->setShuncharControls();
  else {
    if (parm.n <= Char(-1))
      sdBuilder.syntax->addShunchar(Char(parm.n));
  }
  for (;;) {
    if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET,
				      SdParam::number), parm))
      return 0;
    if (parm.type != SdParam::number)
      break;
    if (parm.n <= Char(-1))
      sdBuilder.syntax->addShunchar(Char(parm.n));
  }
  return 1;
}


Boolean Parser::sdParseFunction(SdBuilder &sdBuilder, SdParam &parm)
{
  static Sd::ReservedName standardNames[3] = {
    Sd::rRE, Sd::rRS, Sd::rSPACE
  };
  for (int i = 0; i < 3; i++) {
    if (!parseSdParam(AllowedSdParams(SdParam::reservedName
				      + standardNames[i]),
		      parm))
      return 0;
    if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
      return 0;
    Char c;
    if (translateSyntax(sdBuilder, parm.n, c)) {
      if (checkNotFunction(*sdBuilder.syntax, c))
	sdBuilder.syntax->setStandardFunction(Syntax::StandardFunction(i), c);
      else
	sdBuilder.valid = 0;
    }
  }
  Boolean haveMsichar = 0;
  Boolean haveMsochar = 0;
  for (;;) {
    if (!parseSdParam(AllowedSdParams(SdParam::name), parm))
      return 0;
    CString name;
    parm.token.moveTo(name);
    if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
				      SdParam::reservedName + Sd::rMSICHAR,
				      SdParam::reservedName + Sd::rMSOCHAR,
				      SdParam::reservedName + Sd::rMSSCHAR,
				      SdParam::reservedName + Sd::rSEPCHAR,
				      SdParam::reservedName + Sd::rLCNMSTRT),
		      parm))
      return 0;
    if (parm.type == SdParam::reservedName + Sd::rLCNMSTRT) {
      if (name != sd().reservedName(Sd::rNAMING))
	message(Messages::namingBeforeLcnmstrt,
		StringMessageArg(name));
      break;
    }
    Syntax::FunctionClass functionClass;
    switch (parm.type) {
    case SdParam::reservedName + Sd::rFUNCHAR:
      functionClass = Syntax::cFUNCHAR;
      break;
    case SdParam::reservedName + Sd::rMSICHAR:
      haveMsichar = 1;
      functionClass = Syntax::cMSICHAR;
      break;
    case SdParam::reservedName + Sd::rMSOCHAR:
      haveMsochar = 1;
      functionClass = Syntax::cMSOCHAR;
      break;
    case SdParam::reservedName + Sd::rMSSCHAR:
      functionClass = Syntax::cMSSCHAR;
      break;
    case SdParam::reservedName + Sd::rSEPCHAR:
      functionClass = Syntax::cSEPCHAR;
      break;
    default:
      CANNOT_HAPPEN();
    }
    if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
      return 0;
    Char c;
    CString transName;
    if (translateSyntax(sdBuilder, parm.n, c)
        && checkNotFunction(*sdBuilder.syntax, c)
	&& translateName(sdBuilder, name, transName)) {
      Char tem;
      if (sdBuilder.syntax->lookupFunctionChar(transName, &tem))
	message(Messages::duplicateFunctionName, StringMessageArg(transName));
      else
	sdBuilder.syntax->addFunctionChar(transName, functionClass, c);
    }
  }
  if (haveMsochar && !haveMsichar)
    message(Messages::msocharRequiresMsichar);
  return 1;
}

Boolean Parser::sdParseNaming(SdBuilder &sdBuilder, SdParam &parm)
{
  static Sd::ReservedName names[3] = {
    Sd::rUCNMSTRT, Sd::rLCNMCHAR, Sd::rUCNMCHAR
  };
  String<SyntaxChar> namechars[4];
  if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral), parm))
    return 0;
  parm.paramLiteralText.moveTo(namechars[0]);
  for (int i = 0; i < 3; i++) {
    if (!parseSdParam(AllowedSdParams(SdParam::reservedName + names[i]),
		      parm))
      return 0;
    if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral), parm))
      return 0;
    parm.paramLiteralText.moveTo(namechars[i + 1]);
  }
  // Check that character not both in LCNMCHAR or UCNMCHAR and
  // LCNMSTRT or UCNMSTRT
  for (i = 0; i < 2; i++) {
    for (size_t j = 0; j < namechars[i].length(); j++)
      for (int k = 2; k < 4; k++)
	for (size_t l = 0; l < namechars[k].length(); l++)
	  if (namechars[i][j] == namechars[k][l])
	    message(Messages::nmcharNmStrt,
		    NumberMessageArg(namechars[i][j]));
  }

  if (namechars[0].length() != namechars[1].length()) {
    message(Messages::nmstrtLength);
    sdBuilder.valid = 0;
  }
  else {
    for (size_t i = 0; i < namechars[0].length(); i++) {
      Char lc, uc;
      if (translateSyntax(sdBuilder, namechars[0][i], lc)
	  && checkNmchar(*sdBuilder.syntax, namechars[0][i], lc)
	  && translateSyntax(sdBuilder, namechars[1][i], uc)
	  && checkNmchar(*sdBuilder.syntax, namechars[1][i], uc))
	sdBuilder.syntax->addNmstrtPair(lc, uc);
    }
  }
  if (namechars[2].length() != namechars[3].length()) {
    message(Messages::nmcharLength);
    sdBuilder.valid = 0;
  }
  else {
    for (size_t i = 0; i < namechars[2].length(); i++) {
      Char lc, uc;
      if (translateSyntax(sdBuilder, namechars[2][i], lc)
	  && checkNmchar(*sdBuilder.syntax, namechars[2][i], lc)
	  && translateSyntax(sdBuilder, namechars[3][i], uc)
	  && checkNmchar(*sdBuilder.syntax, namechars[3][i], uc))
	sdBuilder.syntax->addNmcharPair(lc, uc);
    }
  }
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNAMECASE),
		    parm))
    return 0;
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
		    parm))
    return 0;
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
				    SdParam::reservedName + Sd::rYES),
		    parm))
    return 0;
  sdBuilder.syntax->setNamecaseGeneral(parm.type
				       == SdParam::reservedName + Sd::rYES);

  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rENTITY),
		    parm))
    return 0;
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
				    SdParam::reservedName + Sd::rYES),
		    parm))
    return 0;
  sdBuilder.syntax->setNamecaseEntity(parm.type
				      == SdParam::reservedName + Sd::rYES);
  return 1;
}

Boolean Parser::sdParseDelim(SdBuilder &sdBuilder, SdParam &parm)
{
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDELIM),
		    parm))
    return 0;
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
		    parm))
    return 0;
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
		    parm))
    return 0;
  PackedBoolean delimGeneralSpecified[Syntax::nDelimGeneral];
  for (int i = 0; i < Syntax::nDelimGeneral; i++)
    delimGeneralSpecified[i] = 0;
  for (;;) {
    if (!parseSdParam(AllowedSdParams(SdParam::generalDelimiterName,
				      SdParam::reservedName + Sd::rSHORTREF),
		      parm))
      return 0;
    if (parm.type == SdParam::reservedName + Sd::rSHORTREF)
      break;
    Syntax::DelimGeneral delimGeneral = parm.delimGeneralIndex;
    if (delimGeneralSpecified[delimGeneral])
      message(Messages::duplicateDelimGeneral,
	      StringMessageArg(sd().generalDelimiterName(delimGeneral)));
    if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral), parm))
      return 0;
    CString str;
    if (translateSyntax(sdBuilder, parm.paramLiteralText, str)) {
      const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
      for (size_t i = 0; i < str.length(); i++)
	table->subst(str[i]);
      if (checkGeneralDelim(*sdBuilder.syntax, str)
	  && !delimGeneralSpecified[delimGeneral])
	sdBuilder.syntax->setDelimGeneral(delimGeneral, str);
      else
	sdBuilder.valid = 0;
    }
    delimGeneralSpecified[delimGeneral] = 1;
  }
  ISet<WideChar> missing;
  CharSwitcher switcher;
  if (!setRefDelimGeneral(*sdBuilder.syntax,
			  sdBuilder.sd->docCharset(),
			  switcher,
			  missing))
    sdBuilder.valid = 0;
  if (!missing.isEmpty()) {
    message(Messages::missingSignificant646, CharsetMessageArg(missing));
    sdBuilder.valid = 0;
  }
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF,
				    SdParam::reservedName + Sd::rNONE),
		    parm))
    return 0;
  if (parm.type == SdParam::reservedName + Sd::rSGMLREF) {
    CharSwitcher switcher;
    addRefDelimShortref(*sdBuilder.syntax,
			sdBuilder.sd->docCharset(),
			switcher,
			missing);
  }
  for (;;) {
    if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
				      SdParam::reservedName + Sd::rNAMES),
		      parm))
      return 0;
    if (parm.type != SdParam::paramLiteral)
      break;
    CString str;
    if (translateSyntax(sdBuilder, parm.paramLiteralText, str)) {
      const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
      for (size_t i = 0; i < str.length(); i++)
	table->subst(str[i]);
      if (checkShortrefDelim(*sdBuilder.syntax, sdBuilder.sd->docCharset(),
			     str)) {
	int i;
	if (sdBuilder.syntax->lookupShortref(str, &i))
	  message(Messages::duplicateDelimShortref,
		  StringMessageArg(str));
	else
	  sdBuilder.syntax->addDelimShortref(str);
      }
    }
  }
  return 1;
}

Boolean Parser::sdParseNames(SdBuilder &sdBuilder, SdParam &parm)
{
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
		    parm))
    return 0;
  for (;;) {
    if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rQUANTITY,
				      SdParam::referenceReservedName),
		      parm))
      return 0;
    if (parm.type == SdParam::reservedName + Sd::rQUANTITY)
      break;
    Syntax::ReservedName reservedName = parm.reservedNameIndex;
    if (!parseSdParam(AllowedSdParams(SdParam::name), parm))
      return 0;
    Syntax::ReservedName tem;
    if (syntax().lookupReservedName(parm.token, &tem))
      message(Messages::nameReferenceReservedName,
	      StringMessageArg(parm.token));
    else {
      CString transName;
      if (translateName(sdBuilder, parm.token, transName)) {
	if (sdBuilder.syntax->lookupReservedName(transName, &tem))
	  message(Messages::duplicateReservedName,
		  StringMessageArg(transName));
	else {
	  if (options().warnSgmlDecl) {
	    // Check that its a valid name in the declared syntax
	    // (- and . might not be name characters).
	    for (size_t i = 1; i < transName.length(); i++)
	      if (!sdBuilder.syntax->isNameCharacter(transName[i])) {
		message(Messages::reservedNameSyntax,
			StringMessageArg(transName));
		break;
	      }
	  }
	  sdBuilder.syntax->setName(reservedName, transName);
	}
      }
    }
  }
  setRefNames(*sdBuilder.syntax, sdBuilder.sd->docCharset());
  static Syntax::ReservedName functionNameIndex[3] = {
    Syntax::rRE, Syntax::rRS, Syntax::rSPACE
  };
  for (int i = 0; i < 3; i++) {
    const CString &functionName
      = sdBuilder.syntax->reservedName(functionNameIndex[i]);
    Char tem;
    if (sdBuilder.syntax->lookupFunctionChar(functionName, &tem))
      message(Messages::duplicateFunctionName, StringMessageArg(functionName));
  }
  sdBuilder.syntax->enterStandardFunctionNames();
  return 1;
}

Boolean Parser::sdParseQuantity(SdBuilder &sdBuilder, SdParam &parm)
{
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
		    parm))
    return 0;
  for (;;) {
    if (!parseSdParam(AllowedSdParams(SdParam::quantityName,
				      SdParam::reservedName + Sd::rFEATURES),
		      parm))
      return 0;
    if (parm.type == SdParam::reservedName + Sd::rFEATURES)
      break;
    Syntax::Quantity quantity = parm.quantityIndex;
    if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
      return 0;
    sdBuilder.syntax->setQuantity(quantity, parm.n);
  }
  if (sdBuilder.sd->scopeInstance()) {
    for (int i = 0; i < Syntax::nQuantity; i++)
      if (sdBuilder.syntax->quantity(Syntax::Quantity(i))
	  < syntax().quantity(Syntax::Quantity(i)))
	message(Messages::scopeInstanceQuantity,
		StringMessageArg(sd().quantityName(Syntax::Quantity(i))));
  }
  return 1;
}

Boolean Parser::sdParseFeatures(SdBuilder &sdBuilder, SdParam &parm)
{
  struct FeatureInfo {
    Sd::ReservedName name;
    enum {
      none,
      boolean,
      number
    } arg;
  };
  static FeatureInfo features[] = {
    { Sd::rMINIMIZE, FeatureInfo::none },
    { Sd::rDATATAG, FeatureInfo::boolean },
    { Sd::rOMITTAG, FeatureInfo::boolean },
    { Sd::rRANK, FeatureInfo::boolean },
    { Sd::rSHORTTAG, FeatureInfo::boolean },
    { Sd::rLINK, FeatureInfo::none },
    { Sd::rSIMPLE, FeatureInfo::number },
    { Sd::rIMPLICIT, FeatureInfo::boolean },
    { Sd::rEXPLICIT, FeatureInfo::number },
    { Sd::rOTHER, FeatureInfo::none },
    { Sd::rCONCUR, FeatureInfo::number },
    { Sd::rSUBDOC, FeatureInfo::number },
    { Sd::rFORMAL, FeatureInfo::boolean }
  };
  int booleanFeature = 0;
  int numberFeature = 0;
  for (int i = 0; i < SIZEOF(features); i++) {
    if (!parseSdParam(AllowedSdParams(SdParam::reservedName
				      + features[i].name), parm))
      return 0;
    if (features[i].arg != FeatureInfo::none) {
      if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
					SdParam::reservedName + Sd::rYES),
			parm))
	return 0;
      if (features[i].name == Sd::rDATATAG
	  && parm.type == (SdParam::reservedName + Sd::rYES))
	message(Messages::datatagNotImplemented);
      if (features[i].arg == FeatureInfo::number) {
	if (parm.type == SdParam::reservedName + Sd::rYES) {
	  if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
	    return 0;
	  sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
					 parm.n);
	}
	else
	  sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
					 0);
      }
      else
	  sdBuilder.sd->setBooleanFeature(Sd::BooleanFeature(booleanFeature++),
					  parm.type == (SdParam::reservedName
							+ Sd::rYES));
    }
  }
  return 1;
}

Boolean Parser::sdParseAppinfo(SdBuilder &, SdParam &parm)
{
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rAPPINFO),
		    parm))
    return 0;
  Location location(currentLocation());
  if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
				    SdParam::minimumLiteral),
		    parm))
    return 0;
  AppinfoEvent *event;
  if (parm.type == SdParam::minimumLiteral)
    event = new (eventAllocator()) AppinfoEvent(parm.literalText, location);
  else
    event = new (eventAllocator()) AppinfoEvent(location);
  eventHandler().appinfo(event);
  return 1;
}


Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
				WideChar syntaxChar, Char &docChar)
{
  Number n;
  CString str;
  CharsetDeclRange::Type type;
  const PublicId *id;
  if (sdBuilder.syntaxCharsetDecl.getCharInfo(syntaxChar,
					      id,
					      type,
					      n,
					      str)) {
    ISet<WideChar> docChars;
    switch (type) {
    case CharsetDeclRange::unused:
      break;
    case CharsetDeclRange::string:
      sdBuilder.sd->docCharsetDecl().stringToChar(str, docChars);
      break;
    case CharsetDeclRange::number:
      sdBuilder.sd->docCharsetDecl().numberToChar(id, n, docChars);
      break;
    default:
      CANNOT_HAPPEN();
    }
    if (!docChars.isEmpty()) {
      if (!docChars.isSingleton() && options().warnSgmlDecl)
	message(Messages::ambiguousDocCharacter,
		CharsetMessageArg(docChars));
      ISetIter<WideChar> iter(docChars);
      WideChar min, max;
      if (iter.next(min, max) && min <= Char(-1)) {
	docChar = Char(min);
	return 1;
      }
    }
  }
  UnivChar univChar;
  if (sdBuilder.syntaxCharset.descToUniv(syntaxChar, univChar)
      && univToDescCheck(sdBuilder.sd->docCharset(), univChar, docChar))
    return 1;
  sdBuilder.valid = 0;
  message(Messages::translateSyntaxChar, NumberMessageArg(syntaxChar));
  return 0;
}

Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
				const String<SyntaxChar> &syntaxString,
				CString &docString)
{
  docString.clear();
  int ret = 1;
  for (size_t i = 0; i < syntaxString.length(); i++) {
    Char c;
    if (translateSyntax(sdBuilder, syntaxString[i], c))
      docString += c;
    else
      ret = 0;
  }
  return ret;
}

Boolean Parser::translateName(SdBuilder &sdBuilder,
			      const CString &name,
			      CString &str)
{
  str.init(name.length());
  for (size_t i = 0; i < name.length(); i++) {
    UnivChar univChar;
    Boolean ret = sd().docCharset().descToUniv(name[i], univChar);
    ASSERT(ret != 0);
    if (!univToDescCheck(sdBuilder.sd->docCharset(), univChar, str[i])) {
      message(Messages::translateDocChar, NumberMessageArg(univChar));
      sdBuilder.valid = 0;
      return 0;
    }
  }
  return 1;
}

Boolean Parser::checkNotFunction(const Syntax &syn, Char c)
{
  if (syn.charSet(Syntax::functionChar)->contains(c)) {
    message(Messages::oneFunction, NumberMessageArg(c));
    return 0;
  }
  else
    return 1;
}


// Check that it has at most one B sequence and that it
// is not adjacent to a blank sequence.

Boolean Parser::checkShortrefDelim(const Syntax &syn,
				   const CharsetInfo &charset,
				   const CString &delim)
{
  Boolean hadB = 0;
  Char letterB = charset.execToDesc('B');
  const ISet<Char> *bSet = syn.charSet(Syntax::blank);
  for (size_t i = 0; i < delim.length(); i++)
    if (delim[i] == letterB) {
      if (hadB) {
	message(Messages::multipleBSequence, StringMessageArg(delim));
	return 0;
      }
      hadB = 1;
      if (i > 0 && bSet->contains(delim[i - 1])) {
	message(Messages::blankAdjacentBSequence,
		StringMessageArg(delim));
	return 0;
      }
      while (i + 1 < delim.length() && delim[i + 1] == letterB)
	i++;
      if (i < delim.length() - 1 && bSet->contains(delim[i + 1])) {
	message(Messages::blankAdjacentBSequence,
		StringMessageArg(delim));
	return 0;
      }
    }
  return 1;
}

Boolean Parser::checkGeneralDelim(const Syntax &syn, const CString &delim)
{
  const ISet<Char> *functionSet = syn.charSet(Syntax::functionChar);
  if (delim.length() > 0) {
    Boolean allFunction = 1;
    for (size_t i = 0; i < delim.length(); i++)
      if (!functionSet->contains(delim[i]))
	allFunction = 0;
    if (allFunction) {
      message(Messages::generalDelimAllFunction,
	      StringMessageArg(delim));
      return 0;
    }
  }
  return 1;
}

Boolean Parser::checkSwitch(UnivChar univChar)
{
  // Check that it is not Digit Lcletter or Ucletter
  if ((UnivCharsetDesc::a <= univChar && univChar < UnivCharsetDesc::a + 26)
      || (UnivCharsetDesc::A <= univChar && univChar < UnivCharsetDesc::A + 26)
      || (UnivCharsetDesc::zero <= univChar
	  && univChar < UnivCharsetDesc::zero + 10)) {
    message(Messages::switchLetterDigit, NumberMessageArg(univChar));
    return 0;
  }
  return 1;
}

Boolean Parser::checkNmchar(const Syntax &syn, UnivChar univChar, Char docChar)
{
  if (UnivCharsetDesc::a <= univChar && univChar < UnivCharsetDesc::a + 26) {
    message(Messages::nmcharLcletter, NumberMessageArg(docChar));
    return 0;
  }
  if (UnivCharsetDesc::A <= univChar && univChar < UnivCharsetDesc::A + 26) {
    message(Messages::nmcharUcletter, NumberMessageArg(docChar));
    return 0;
  }
  if (UnivCharsetDesc::zero <= univChar
      && univChar < UnivCharsetDesc::zero + 10) {
    message(Messages::nmcharDigit, NumberMessageArg(docChar));
    return 0;
  }
  Char funChar;
  if (syn.getStandardFunction(Syntax::fRE, funChar) && docChar == funChar) {
    message(Messages::nmcharRe, NumberMessageArg(docChar));
    return 0;
  }
  if (syn.getStandardFunction(Syntax::fRS, funChar) && docChar == funChar) {
    message(Messages::nmcharRs, NumberMessageArg(docChar));
    return 0;
  }
  if (syn.getStandardFunction(Syntax::fSPACE, funChar)
      && docChar == funChar) {
    message(Messages::nmcharSpace, NumberMessageArg(docChar));
    return 0;
  }
  if (syn.charSet(Syntax::sepchar)->contains(docChar)) {
    message(Messages::nmcharSepchar, NumberMessageArg(docChar));
    return 0;
  }
  return 1;
}

void Parser::checkSyntaxNamelen(const Syntax &syn)
{
  size_t namelen = syn.namelen();
  int i;
  for (i = 0; i < Syntax::nDelimGeneral; i++)
    if (syn.delimGeneral(i).length() > namelen)
      message(Messages::delimiterLength,
	      StringMessageArg(syn.delimGeneral(i)),
	      NumberMessageArg(namelen));
  for (i = 0; i < syn.nDelimShortref(); i++)
    if (syn.delimShortref(i).length() > namelen)
      message(Messages::delimiterLength,
	      StringMessageArg(syn.delimShortref(i)),
	      NumberMessageArg(namelen));
  for (i = 0; i < Syntax::nNames; i++)
    if (syn.reservedName(Syntax::ReservedName(i)).length() > namelen
	&& options().warnSgmlDecl)
      message(Messages::reservedNameLength,
	      StringMessageArg(syn.reservedName(Syntax::ReservedName(i))),
	      NumberMessageArg(namelen));
}

Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
				Char &to)
{
  WideChar c;
  ISet<WideChar> descSet;
  unsigned ret = charset.univToDesc(from, c, descSet);
  if (ret > 1) {
    if (options().warnSgmlDecl)
      message(Messages::ambiguousDocCharacter, CharsetMessageArg(descSet));
    ret = 1;
  }
  if (ret && c <= Char(-1)) {
    to = Char(c);
    return 1;
  }
  return 0;
}

Boolean Parser::parseSdParam(const AllowedSdParams &allow,
			     SdParam &parm)
{
  for (;;) {
    Token token = getToken(mdMode);
    switch (token) {
    case tokenUnrecognized:
      if (reportNonSgmlCharacter())
	break;
      {
	message(Messages::markupDeclarationCharacter,
		StringMessageArg(currentToken()),
		AllowedSdParamsMessageArg(allow, sdPointer()));
      }
      return 0;
    case tokenEe:
      message(Messages::sdEntityEnd);
      return 0;
    case tokenS:
      break;
    case tokenCom:
      if (!parseComment(sdcomMode))
	return 0;
      break;
    case tokenDso:
    case tokenGrpo:
    case tokenMinusGrpo:
    case tokenPlusGrpo:
    case tokenRni:
    case tokenLcUcNmchar:
    case tokenPeroNameStart:
    case tokenPeroGrpo:
      sdParamInvalidToken(token, allow);
      return 0;
    case tokenLita:
    case tokenLit:
      {
	Boolean lita = (token == tokenLita);
	if (allow.param(SdParam::minimumLiteral)) {
	  if (!parseMinimumLiteral(lita, parm.literalText))
	    return 0;
	  parm.type = SdParam::minimumLiteral;
	}
	else if (allow.param(SdParam::paramLiteral)) {
	  if (!parseSdParamLiteral(lita, parm.paramLiteralText))
	    return 0;
	  parm.type = SdParam::paramLiteral;
	}
	else {
	  sdParamInvalidToken(token, allow);
	  return 0;
	}
	return 1;
      }
    case tokenMdc:
      if (allow.param(SdParam::mdc)) {
	parm.type = SdParam::mdc;
	return 1;
      }
      sdParamInvalidToken(tokenMdc, allow);
      return 0;
    case tokenNameStart:
      {
	extendNameToken(syntax().namelen(), Messages::nameLength);
	getCurrentToken(syntax().generalSubstTable(), parm.token);
	if (allow.param(SdParam::capacityName)) {
	  if (sd().lookupCapacityName(parm.token, parm.capacityIndex)) {
	    parm.type = SdParam::capacityName;
	    return 1;
	  }
	}
	if (allow.param(SdParam::referenceReservedName)) {
	  if (syntax().lookupReservedName(parm.token,
					  &parm.reservedNameIndex)) {
	    parm.type = SdParam::referenceReservedName;
	    return 1;
	  }
	}
	if (allow.param(SdParam::generalDelimiterName)) {
	  if (sd().lookupGeneralDelimiterName(parm.token,
					      parm.delimGeneralIndex)) {
	    parm.type = SdParam::generalDelimiterName;
	    return 1;
	  }
	}
	if (allow.param(SdParam::quantityName)) {
	  if (sd().lookupQuantityName(parm.token, parm.quantityIndex)) {
	    parm.type = SdParam::quantityName;
	    return 1;
	  }
	}
	for (int i = 0;; i++) {
	  SdParam::Type t = allow.get(i);
	  if (t == SdParam::invalid)
	    break;
	  if (t >= SdParam::reservedName) {
	    if (parm.token == sd().reservedName(t - SdParam::reservedName)) {
	      parm.type = t;
	      return 1;
	    }
	  }
	}
	if (allow.param(SdParam::name)) {
	  parm.type = SdParam::name;
	  return 1;
	}
	{
	  message(Messages::sdInvalidName,
		  StringMessageArg(parm.token),
		  AllowedSdParamsMessageArg(allow, sdPointer()));
	}
	return 0;
      }
    case tokenDigit:
      if (allow.param(SdParam::number)) {
	extendNumber(syntax().namelen(), Messages::numberLength);
	parm.type = SdParam::number;
	unsigned long n;
	if (!stringToNumber(currentInput()->currentTokenStart(),
			    currentInput()->currentTokenLength(),
			    n)
	    || n > Number(-1)) {
	  message(Messages::numberTooBig,
		  StringMessageArg(currentToken()));
	  parm.n = Number(-1);
	}
	else
	  parm.n = Number(n);
	Token token = getToken(mdMode);
	if (token == tokenNameStart)
	  message(Messages::psRequired);
	currentInput()->ungetToken();
	return 1;
      }
      sdParamInvalidToken(tokenDigit, allow);
      return 0;
    default:
      CANNOT_HAPPEN();
    }
  }
}

// This is a separate function, because we might want SyntaxChar
// to be bigger than Char.

Boolean Parser::parseSdParamLiteral(Boolean lita, String<SyntaxChar> &str)
{
  str.clear();
  const int refLitlen = Syntax::referenceQuantity(Syntax::qLITLEN);

  Mode mode = lita ? sdplitaMode : sdplitMode;
  for (;;) {
    Token token = getToken(mode);
    switch (token) {
    case tokenEe:
      message(Messages::literalLevel);
      return 0;
    case tokenUnrecognized:
      if (reportNonSgmlCharacter())
	break;
      message(Messages::sdLiteralSignificant,
	      StringMessageArg(currentToken()));
      str += currentChar();
      break;
    case tokenCroDigit:
      {
	InputSource *in = currentInput();
	in->discardInitial();
	extendNumber(syntax().namelen(), Messages::numberLength);
	unsigned long n;
	if (!stringToNumber(in->currentTokenStart(),
			    in->currentTokenLength(),
			    n)
	    || n > SyntaxChar(-1))
	  message(Messages::syntaxCharacterNumber,
		  StringMessageArg(currentToken()));
	else
	  str += SyntaxChar(n);
	(void)getToken(refMode);
      }
      break;
    case tokenCroNameStart:
      if (!parseNamedCharRef())
	return 0;
      break;
    case tokenLit:
    case tokenLita:
      goto done;
    case tokenPeroNameStart:
    case tokenPeroGrpo:
      message(Messages::sdParameterEntity);
      {
	const Char *p = currentInput()->currentTokenStart();
	for (size_t count = currentInput()->currentTokenLength();
	     count > 0;
	     count--)
	  str += *p++;
      }
      break;
    case tokenChar:
      if (str.length() > refLitlen*2
	  && currentChar() == syntax().standardFunction(Syntax::fRE)) {
	message(Messages::parameterLiteralLength, NumberMessageArg(refLitlen));
	// guess that the closing delimiter has been omitted
	message(Messages::literalClosingDelimiter);
	return 0;
      }
      str += currentChar();
      break;
    }
  }
 done:
  if (str.length() > refLitlen)
    message(Messages::parameterLiteralLength, NumberMessageArg(refLitlen));
  return 1;
}

Boolean Parser::stringToNumber(const Char *s, size_t length,
			       unsigned long &result)
{
  unsigned long n = 0;
  for (; length > 0; length--, s++) {
    int val = sd().digitWeight(*s);
    if (n <= ULONG_MAX/10 && (n *= 10) <= ULONG_MAX - val)
      n += val;
    else
      return 0;
  }
  result = n;
  return 1;
}

void Parser::sdParamInvalidToken(Token token,
				 const AllowedSdParams &allow)
{
  message(Messages::sdParamInvalidToken,
	  TokenMessageArg(token, mdMode, syntaxPointer(), sdPointer()),
	  AllowedSdParamsMessageArg(allow, sdPointer()));
}

AllowedSdParams::AllowedSdParams(SdParam::Type arg1, SdParam::Type arg2,
				 SdParam::Type arg3, SdParam::Type arg4,
				 SdParam::Type arg5, SdParam::Type arg6)
{
  allow_[0] = arg1;
  allow_[1] = arg2;
  allow_[2] = arg3;
  allow_[3] = arg4;
  allow_[4] = arg5;
  allow_[5] = arg6;
}

Boolean AllowedSdParams::param(SdParam::Type t) const
{
  for (int i = 0; i < maxAllow && allow_[i] != SdParam::invalid; i++)
    if (t == allow_[i])
      return 1;
  return 0;
}

SdParam::Type AllowedSdParams::get(int i) const
{
  return i < 0 || i >= maxAllow ? SdParam::Type(SdParam::invalid) : allow_[i];
}

AllowedSdParamsMessageArg::AllowedSdParamsMessageArg(
  const AllowedSdParams &allow,
  const ConstResourcePointer<Sd> &sd)
: allow_(allow), sd_(sd)
{
}

MessageArg *AllowedSdParamsMessageArg::copy() const
{
  return new AllowedSdParamsMessageArg(*this);
}

void AllowedSdParamsMessageArg::append(MessageBuilder &builder) const
{
  for (int i = 0;; i++) {
    SdParam::Type type = allow_.get(i);
    if (type == SdParam::invalid)
      break;
    if (i != 0)
      builder.appendFragment(Fragments::listSep);
    switch (type) {
    case SdParam::minimumLiteral:
      builder.appendFragment(Fragments::minimumLiteral);
      break;
    case SdParam::mdc:
      {
	builder.appendFragment(Fragments::delimStart);
	Char c = sd_->execToDoc('>');
	builder.appendChars(&c, 1);
	builder.appendFragment(Fragments::delimEnd);
      }
      break;
    case SdParam::number:
      builder.appendFragment(Fragments::number);
      break;
    case SdParam::name:
      builder.appendFragment(Fragments::name);
      break;
    case SdParam::paramLiteral:
      builder.appendFragment(Fragments::parameterLiteral);
      break;
    case SdParam::capacityName:
      builder.appendFragment(Fragments::capacityName);
      break;
    case SdParam::generalDelimiterName:
      builder.appendFragment(Fragments::generalDelimiteRoleName);
      break;
    case SdParam::referenceReservedName:
      builder.appendFragment(Fragments::referenceReservedName);
      break;
    case SdParam::quantityName:
      builder.appendFragment(Fragments::quantityName);
      break;
    default:
      {
	CString str(sd_->reservedName(type - SdParam::reservedName));
	builder.appendChars(str.pointer(), str.length());
	break;
      }
    }
  }
}

SdBuilder::SdBuilder()
: valid(1)
{
}

void SdBuilder::addFormalError(const Location &location,
			       Messages::Type1 message,
			       const CString &id)
{
  formalErrorList.insert(new SdFormalError(location, message, id));
}

SdFormalError::SdFormalError(const Location &location,
			     Messages::Type1 message,
			     const CString &id)
: location_(location),
  message_(message),
  id_(id)
{
}

void SdFormalError::send(ParserState &parser)
{
  parser.message(location_, message_, StringMessageArg(id_));
}

CharSwitcher::CharSwitcher()
: switches_(0)
{
}

CharSwitcher::CharSwitcher(const VectorBase<UnivChar> *switches)
: switches_(switches),
  switchUsed_(switches->length() / 2)
{
  for (size_t i = 0; i < switchUsed_.length(); i++)
    switchUsed_[i] = 0;
}

UnivChar CharSwitcher::subst(UnivChar c)
{
  if (switches_) {
    for (size_t i = 0; i < switches_->length(); i += 2)
      if ((*switches_)[i] == c) {
	switchUsed_[i/2] = 1;
	return (*switches_)[i + 1];
      }
  }
  return c;
}

size_t CharSwitcher::nSwitches() const
{
  return switchUsed_.length();
}

Boolean CharSwitcher::switchUsed(size_t i) const
{
  return switchUsed_[i];
}

UnivChar CharSwitcher::switchFrom(size_t i) const
{
  return (*switches_)[i*2];
}



CharsetMessageArg::CharsetMessageArg(const ISet<WideChar> &set)
: set_(set)
{
}

MessageArg *CharsetMessageArg::copy() const
{
  return new CharsetMessageArg(*this);
}

void CharsetMessageArg::append(MessageBuilder &builder) const
{
  ISetIter<WideChar> iter(set_);
  WideChar min, max;
  Boolean first = 1;
  while (iter.next(min, max)) {
    if (first)
      first = 0;
    else
      builder.appendFragment(Fragments::listSep);
    builder.appendNumber(min);
    if (max != min) {
      builder.appendFragment(max == min + 1
			     ? Fragments::listSep
			     : Fragments::rangeSep);
      builder.appendNumber(max);
    }
  }
}
