// Copyright (c) 1994 James Clark
// See the file COPYING for copying permission.

#ifndef Syntax_INCLUDED
#define Syntax_INCLUDED 1
#ifdef __GNUG__
#pragma interface
#endif

#include "types.H"
#include "Boolean.H"
#include "ISet.H"
#include "CString.H"
#include "SubstTable.H"
#include "HashTable.H"
#include "GrowableVectorD.H"
#include "Resource.H"
#include "XcharMap.H"

class Sd;
class CharsetInfo;

class Syntax : public Resource {
public:
  enum ReservedName {
    rANY,
    rATTLIST,
    rCDATA,
    rCONREF,
    rCURRENT,
    rDEFAULT,
    rDOCTYPE,
    rELEMENT,
    rEMPTY,
    rENDTAG,
    rENTITIES,
    rENTITY,
    rFIXED,
    rID,
    rIDLINK,
    rIDREF,
    rIDREFS,
    rIGNORE,
    rIMPLIED,
    rINCLUDE,
    rINITIAL,
    rLINK,
    rLINKTYPE,
    rMD,
    rMS,
    rNAME,
    rNAMES,
    rNDATA,
    rNMTOKEN,
    rNMTOKENS,
    rNOTATION,
    rNUMBER,
    rNUMBERS,
    rNUTOKEN,
    rNUTOKENS,
    rO,
    rPCDATA,
    rPI,
    rPOSTLINK,
    rPUBLIC,
    rRCDATA,
    rRE,
    rREQUIRED,
    rRESTORE,
    rRS,
    rSDATA,
    rSHORTREF,
    rSIMPLE,
    rSPACE,
    rSTARTTAG,
    rSUBDOC,
    rSYSTEM,
    rTEMP,
    rUSELINK,
    rUSEMAP
  };
  enum { nNames = rUSEMAP + 1 };
  enum Quantity {
    qATTCNT,
    qATTSPLEN,
    qBSEQLEN,
    qDTAGLEN,
    qDTEMPLEN,
    qENTLVL,
    qGRPCNT,
    qGRPGTCNT,
    qGRPLVL,
    qLITLEN,
    qNAMELEN,
    qNORMSEP,
    qPILEN,
    qTAGLEN,
    qTAGLVL
  };
  enum { nQuantity = qTAGLVL + 1 };
  enum DelimGeneral {
    dAND,
    dCOM,
    dCRO,
    dDSC,
    dDSO,
    dDTGC,
    dDTGO,
    dERO,
    dETAGO,
    dGRPC,
    dGRPO,
    dLIT,
    dLITA,
    dMDC,
    dMDO,
    dMINUS,
    dMSC,
    dNET,
    dOPT,
    dOR,
    dPERO,
    dPIC,
    dPIO,
    dPLUS,
    dREFC,
    dREP,
    dRNI,
    dSEQ,
    dSTAGO,
    dTAGC,
    dVI
  };
  enum { nDelimGeneral = dVI + 1 };
  enum StandardFunction {
    fRE,
    fRS,
    fSPACE
  };
  enum FunctionClass {
    cFUNCHAR,
    cSEPCHAR,
    cMSOCHAR,
    cMSICHAR,
    cMSSCHAR
  };
  enum Set {
    nameStart,
    digit,
    nmchar,			// LCNMCHAR or UCNMCHAR
    s,
    blank,
    sepchar,
    minimumData,
    significant,
    functionChar,		// function character
    sgmlChar
  };
  enum { nSet = sgmlChar + 1 };
  enum Category {
    otherCategory = 0,
    sCategory = 01,
    nameStartCategory = 02,
    digitCategory = 04,
    otherNameCategory = 010
    };

  Syntax(const Sd &);
  Boolean lookupFunctionChar(const CString &, Char *) const;
  Boolean lookupReservedName(const CString &, ReservedName *) const;
  const CString &reservedName(ReservedName) const;
  CString rniReservedName(ReservedName) const;
  Number quantity(Quantity) const;
  Char standardFunction(int) const;
  Boolean getStandardFunction(int, Char &) const;
  const CString &delim() const;
  const ISet<Char> *charSet(int i) const;
  Char substGeneral(Char) const;
  Char substEntity(Char) const;
  const SubstTable<Char> *generalSubstTable() const;
  const SubstTable<Char> *entitySubstTable() const;
  Boolean namecaseGeneral() const;
  Boolean namecaseEntity() const;
  const CString &delimGeneral(int) const;
  const CString &delimShortref(int) const;
  Boolean lookupShortref(const CString &, int *) const;
  int nDelimShortref() const;
  Boolean isNameCharacter(Xchar) const;
  Boolean isNameStartCharacter(Xchar) const;
  Boolean isDigit(Xchar) const;
  Boolean isS(Xchar) const;
  Boolean isB(Xchar c) const;
  Category charCategory(Xchar) const;
  Boolean isSgmlChar(Xchar) const;
  size_t attcnt() const;
  size_t attsplen() const;
  size_t namelen() const;
  size_t penamelen() const;
  size_t litlen() const;
  size_t normsep() const;
  size_t dtemplen() const;
  size_t grpcnt() const;
  size_t grpgtcnt() const;
  size_t grplvl() const;
  size_t taglvl() const;
  size_t taglen() const;
  size_t entlvl() const;
  size_t pilen() const;
  Char space() const;
  
  void setStandardFunction(StandardFunction, Char);
  void enterStandardFunctionNames();
  void addFunctionChar(const CString &, FunctionClass, Char);
  void setNamecaseGeneral(Boolean);
  void setNamecaseEntity(Boolean);
  void setDelimGeneral(int, const CString &);
  void addDelimShortref(const CString &);
  void addNmcharPair(Char lc, Char uc);
  void addNmstrtPair(Char lc, Char uc);
  void addShunchar(Char);
  void setShuncharControls();
  void setQuantity(int, Number);
  void setName(int, const CString &);
  void setSgmlChar(const ISet<Char> &);
  void implySgmlChar(const CharsetInfo &docCharset);
  void checkSgmlChar(const CharsetInfo &docCharset,
		     const Syntax *otherSyntax,
		     ISet<WideChar> &invalid)
       const;
  static int referenceQuantity(Quantity);
  const XcharMap<unsigned char> &markupScanTable() const;
  Boolean multicode() const;
private:
  Syntax(const Syntax &);	// undefined
  void operator=(const Syntax &); // undefined
  void subst(Char, Char);
  void checkUnivControlChar(UnivChar univChar,
			    const CharsetInfo &docCharset,
			    const Syntax *otherSyntax,
			    ISet<WideChar> &invalid) const;

  ISet<Char> shunchar_;
  PackedBoolean shuncharControls_;
  // FIXME The compiler in Objectcenter 2.0.0 gets confused if
  // the enumerators are used as array sizes.
  ISet<Char> set_[/* nSet */ 10];
  Char standardFunction_[3];
  PackedBoolean standardFunctionValid_[3];
  Boolean namecaseGeneral_;
  Boolean namecaseEntity_;
  CString delimGeneral_[/* nDelimGeneral */ 31];
  GrowableVectorD<CString> delimShortref_;
  CString names_[/* nNames */ 55];
  Number quantity_[nQuantity];
  HashTable<CString,int> nameTable_;
  HashTable<CString,Char> functionTable_;
  HashTable<CString,int> shortrefTable_;
  SubstTable<Char> upperSubst_;
  SubstTable<Char> identitySubst_;
  const SubstTable<Char> *generalSubst_;
  const SubstTable<Char> *entitySubst_;
  XcharMap<unsigned char> categoryTable_;
  Boolean multicode_;
  XcharMap<unsigned char> markupScanTable_;
  static const int referenceQuantity_[];
};

inline Number Syntax::quantity(Quantity q) const
{
  return quantity_[q];
}

inline void Syntax::setQuantity(int i, Number n)
{
  quantity_[i] = n;
}

inline const SubstTable<Char> *Syntax::generalSubstTable() const
{
  return generalSubst_;
}

inline const SubstTable<Char> *Syntax::entitySubstTable() const
{
  return entitySubst_;
}

inline int Syntax::nDelimShortref() const
{
  return int(delimShortref_.length());
}

inline const CString &Syntax::delimGeneral(int i) const
{
  return delimGeneral_[i];
}

inline const CString &Syntax::delimShortref(int i) const
{
  return delimShortref_[i];
}

inline Char Syntax::standardFunction(int i) const
{
  return standardFunction_[i];
}

inline Boolean Syntax::getStandardFunction(int i, Char &result) const
{
  if (standardFunctionValid_[i]) {
    result = standardFunction_[i];
    return 1;
  }
  else
    return 0;
}

inline const ISet<Char> *Syntax::charSet(int i) const
{
  return &set_[i];
}

inline Boolean Syntax::isNameCharacter(Xchar c) const
{
  return categoryTable_[c] >= nameStartCategory;
}

inline Boolean Syntax::isNameStartCharacter(Xchar c) const
{
  return categoryTable_[c] == nameStartCategory;
}

inline Boolean Syntax::isDigit(Xchar c) const
{
  return categoryTable_[c] == digitCategory;
}

inline Boolean Syntax::isS(Xchar c) const
{
  return categoryTable_[c] == sCategory;
}

inline Boolean Syntax::isB(Xchar c) const
{
  return (categoryTable_[c] == sCategory
	  && c != standardFunction_[fRE]
	  && c != standardFunction_[fRS]);
}

inline Syntax::Category Syntax::charCategory(Xchar c) const
{
  return Category(categoryTable_[c]);
}

inline Boolean Syntax::isSgmlChar(Xchar c) const
{
  return c >= 0 && set_[sgmlChar].contains(Char(c));
}

inline const CString &Syntax::reservedName(ReservedName i) const
{
  return names_[i];
}

inline size_t Syntax::attcnt() const
{
  return quantity(Syntax::qATTCNT);
}

inline size_t Syntax::attsplen() const
{
  return quantity(Syntax::qATTSPLEN);
}

inline size_t Syntax::namelen() const
{
  return quantity(Syntax::qNAMELEN);
}

inline size_t Syntax::penamelen() const
{
  return quantity(Syntax::qNAMELEN) - delimGeneral(Syntax::dPERO).length();
}

inline size_t Syntax::litlen() const
{
  return quantity(Syntax::qLITLEN);
}

inline size_t Syntax::normsep() const
{
  return quantity(Syntax::qNORMSEP);
}

inline size_t Syntax::dtemplen() const
{
  return quantity(Syntax::qDTEMPLEN);
}

inline size_t Syntax::grpcnt() const
{
  return quantity(Syntax::qGRPCNT);
}

inline size_t Syntax::grpgtcnt() const
{
  return quantity(Syntax::qGRPGTCNT);
}

inline size_t Syntax::grplvl() const
{
  return quantity(Syntax::qGRPLVL);
}

inline size_t Syntax::taglvl() const
{
  return quantity(Syntax::qTAGLVL);
}

inline size_t Syntax::taglen() const
{
  return quantity(Syntax::qTAGLEN);
}

inline size_t Syntax::entlvl() const
{
  return quantity(Syntax::qENTLVL);
}

inline size_t Syntax::pilen() const
{
  return quantity(Syntax::qPILEN);
}

inline Char Syntax::space() const
{
  return standardFunction(Syntax::fSPACE);
}

inline void Syntax::setSgmlChar(const ISet<Char> &set)
{
  set_[sgmlChar] = set;
}

inline int Syntax::referenceQuantity(Quantity i)
{
  return referenceQuantity_[i];
}

inline void Syntax::setShuncharControls()
{
  shuncharControls_ = 1;
}

inline const XcharMap<unsigned char> &Syntax::markupScanTable() const
{
  return markupScanTable_;
}

inline Boolean Syntax::multicode() const
{
  return multicode_;
}

inline Boolean Syntax::namecaseGeneral() const
{
  return namecaseGeneral_;
}

inline Boolean Syntax::namecaseEntity() const
{
  return namecaseEntity_;
}

#endif /* Syntax_INCLUDED */
