// Copyright (c) 1994 James Clark
// See the file COPYING for copying permission.

#ifdef __GNUG__
#pragma implementation
#endif
#include <stdlib.h>
#include "ContentToken.H"
#include "macros.H"
#include "ElementType.H"
#include "Vector.H"
#include "Dtd.H"
#include "MessageArg.H"
#include "setAll.H"

AndModelGroup::AndModelGroup(VectorBase<Owner<ContentToken> > &v,
			     ContentToken::OccurrenceIndicator oi)
: ModelGroup(v, oi)
{
}

ModelGroup::Connector AndModelGroup::connector() const
{
  return and;
}

OrModelGroup::OrModelGroup(VectorBase<Owner<ContentToken> > &v,
			   ContentToken::OccurrenceIndicator oi)
: ModelGroup(v, oi)
{
  setOrGroup();
}

ModelGroup::Connector OrModelGroup::connector() const
{
  return or;
}


SeqModelGroup::SeqModelGroup(VectorBase<Owner<ContentToken> > &v,
			     ContentToken::OccurrenceIndicator oi)
: ModelGroup(v, oi)
{
}

ModelGroup::Connector SeqModelGroup::connector() const
{
  return seq;
}


ModelGroup::ModelGroup(VectorBase<Owner<ContentToken> > &v,
		       OccurrenceIndicator oi)
: ContentToken(oi), members_(v.length())
{
  for (unsigned i = 0; i < v.length(); i++)
    v[i].moveTo(members_[i]);
}

unsigned long ModelGroup::grpgtcnt() const
{
  unsigned long cnt = 1;
  for (size_t i = 0; i < members_.length(); i++)
    cnt += members_[i]->grpgtcnt();
  return cnt;
}

void ModelGroup::setOrGroup()
{
  for (size_t i = 0; i < members_.length(); i++)
    members_[i]->setOrGroupMember();
}

const ModelGroup *ModelGroup::asModelGroup() const
{
  return this;
}

ElementToken::ElementToken(const ElementType *element, OccurrenceIndicator oi)
: LeafContentToken(element, oi)
{
}

ContentToken::ContentToken(OccurrenceIndicator oi)
: occurrenceIndicator_(oi)
{
}

unsigned long ContentToken::grpgtcnt() const
{
  return 1;
}

void ContentToken::setOrGroupMember()
{
}

const ModelGroup *ContentToken::asModelGroup() const
{
  return 0;
}

const LeafContentToken *ContentToken::asLeafContentToken() const
{
  return 0;
}

LeafContentToken::LeafContentToken(const ElementType *element,
				   OccurrenceIndicator oi)
: element_(element), ContentToken(oi), isFinal_(0), orGroupMember_(0),
  requiredIndex_(size_t(-1))
{
}

Boolean LeafContentToken::isInitial() const
{
  return 0;
}

void LeafContentToken::setOrGroupMember()
{
  orGroupMember_ = 1;
}

const LeafContentToken *LeafContentToken::asLeafContentToken() const
{
  return this;
}

PcdataToken::PcdataToken()
: LeafContentToken(0, rep)
{
}

InitialPseudoToken::InitialPseudoToken()
: LeafContentToken(0, none)
{
}

Boolean InitialPseudoToken::isInitial() const
{
  return 1;
}

DataTagGroup::DataTagGroup(VectorBase<Owner<ContentToken> > &vec,
			   OccurrenceIndicator oi)
: SeqModelGroup(vec, oi)
{
}

DataTagElementToken::DataTagElementToken(const ElementType *element,
					 VectorBase<Text> &templates,
					 Text &paddingTemplate)
: ElementToken(element, ContentToken::none),
  templates_(templates.length()),
  havePaddingTemplate_(1)
{
  for (unsigned i = 0; i < templates.length(); i++)
    templates[i].moveTo(templates_[i]);
  paddingTemplate.moveTo(paddingTemplate_);
}

DataTagElementToken::DataTagElementToken(const ElementType *element,
					 VectorBase<Text> &templates)
: ElementToken(element, ContentToken::none),
  templates_(templates.length()),
  havePaddingTemplate_(0)
{
  for (unsigned i = 0; i < templates.length(); i++)
    templates[i].moveTo(templates_[i]);
}

ContentToken::~ContentToken()
{
}

struct GroupInfo {
  unsigned nextLeafIndex;
  PackedBoolean containsPcdata;
  unsigned andStateSize;
  Vector<unsigned> nextTypeIndex;
  GroupInfo(size_t);
};


GroupInfo::GroupInfo(size_t nType)
: nextTypeIndex(nType), nextLeafIndex(0), containsPcdata(0), andStateSize(0)
{
  setAll(nextTypeIndex.pointer(), nextTypeIndex.length(), unsigned(0));
}

CompiledModelGroup::CompiledModelGroup(Owner<ModelGroup> &modelGroup)
: modelGroup_(modelGroup.extract())
{
}

void CompiledModelGroup::compile(size_t nElementTypeIndex,
				 GrowableVector<ContentModelAmbiguity> &ambiguities)
{
  FirstSet first;
  LastSet last;
  GroupInfo info(nElementTypeIndex);
  modelGroup_->analyze(info, 0, 0, first, last);
  for (unsigned i = 0; i < last.length(); i++)
    last[i]->setFinal();
  andStateSize_ = info.andStateSize;
  containsPcdata_ = info.containsPcdata;
  initial_ = new InitialPseudoToken;
  LastSet initialSet(1);
  initialSet[0] = initial_.pointer();
  ContentToken::addTransitions(initialSet, first, 1, 0, 0);
  if (modelGroup_->inherentlyOptional())
    initial_->setFinal();
  Vector<unsigned> minAndDepth(info.nextLeafIndex);
  Vector<size_t> elementTransition(nElementTypeIndex);
  initial_->finish(minAndDepth, elementTransition, ambiguities);
  modelGroup_->finish(minAndDepth, elementTransition, ambiguities);
}

void ModelGroup::finish(Vector<unsigned> &minAndDepth,
			Vector<size_t> &elementTransition,
			GrowableVector<ContentModelAmbiguity> &ambiguities)
{
  for (unsigned i = 0; i < nMembers(); i++)
    member(i).finish(minAndDepth, elementTransition, ambiguities);
}

void LeafContentToken::finish(Vector<unsigned> &minAndDepthVec,
			      Vector<size_t> &elementTransitionVec,
			      GrowableVector<ContentModelAmbiguity> &ambiguities)
{
  if (andInfo_) {
    andFinish(minAndDepthVec, elementTransitionVec, ambiguities);
    return;
  }
  size_t *elementTransition = elementTransitionVec.pointer();
  unsigned *minAndDepth = minAndDepthVec.pointer();
  setAll(minAndDepth, minAndDepthVec.length(), unsigned(-1));
  setAll(elementTransition, elementTransitionVec.length(), size_t(-1));
  pcdataTransitionType_ = 0;
  simplePcdataTransition_ = 0;
  // follow_ is in decreasing order of andDepth because of how it's
  // constructed.
  size_t n = follow_.length();
  LeafContentToken **follow = follow_.pointer();
  size_t j = 0;
  for (size_t i = 0; i < n; i++) {
    unsigned &minDepth = minAndDepth[follow[i]->index()];
    if (minDepth) {
      minDepth = 0;
      if (j != i)
	follow[j] = follow[i];
      if (i == requiredIndex_)
	requiredIndex_ = j;
      const ElementType *e = follow[i]->elementType();
      unsigned ei;
      if (e == 0) {
	if (follow[i]->andInfo_ == 0) {
	  simplePcdataTransition_ = follow[i];
	  pcdataTransitionType_ = 1;
	}
	else
	  pcdataTransitionType_ = 2;
	ei = 0;
      }
      else
	ei = e->index();
      if (elementTransition[ei] != size_t(-1)) {
	const LeafContentToken *prev = follow[elementTransition[ei]];
	// This might not be true: consider (a & b?)*; after the
	// a there are two different ways to get to the same b,
	// with the same and depth.
	if (follow[i] != prev) {
	  ContentModelAmbiguity &a(ambiguities.grow());
	  a.from = this;
	  a.to1 = prev;
	  a.to2 = follow[i];
	  a.andDepth = 0;
	}
      }
      elementTransition[ei] = j;
      j++;
    }
  }
  follow_.setLength(j);
}

void LeafContentToken::andFinish(Vector<unsigned> &minAndDepthVec,
				 Vector<size_t> &elementTransitionVec,
				 GrowableVector<ContentModelAmbiguity> &ambiguities)
{
  size_t *elementTransition = elementTransitionVec.pointer();
  unsigned *minAndDepth = minAndDepthVec.pointer();
  setAll(minAndDepth, minAndDepthVec.length(), unsigned(-1));
  setAll(elementTransition, elementTransitionVec.length(), size_t(-1));
  pcdataTransitionType_ = 0;
  simplePcdataTransition_ = 0;
  // follow_ is in decreasing order of andDepth because of how it's
  // constructed.
  size_t n = follow_.length();
  size_t j = 0;
  Transition *andFollow = andInfo_->follow.pointer();
  for (size_t i = 0; i < n; i++) {
    unsigned &minDepth = minAndDepth[follow_[i]->index()];
    // ignore transitions to the same token with the same and depth.
    if (andFollow[i].andDepth < minDepth) {
      minDepth = andFollow[i].andDepth;
      if (j != i) {
	follow_[j] = follow_[i];
	andFollow[j] = andFollow[i];
      }
      if (i == requiredIndex_)
	requiredIndex_ = j;
      const ElementType *e = follow_[i]->elementType();
      unsigned ei;
      if (e == 0) {
	pcdataTransitionType_ = 2;
	ei = 0;
      }
      else
	ei = e->index();
      // If we have transitions t1, t2, ... tN to tokens having
      // the same element type, with
      // and-depths d1, d2, ... dN, where d1 >= d2 >= ... >= dN,
      // then there is an ambiguity unless
      // d1 > d2 > ... > dN and t1, t2, ... , tN-1 are all isolated.
      size_t previ = elementTransition[ei];
      if (previ != size_t(-1)) {
	const LeafContentToken *prev = follow_[previ];
	// This might not be true: consider (a & b?)*; after the
	// a there are two different ways to get to the same b,
	// with the same and depth.
	if (follow_[i] != prev
	    && (andFollow[previ].andDepth == andFollow[i].andDepth
		|| !andFollow[previ].isolated)) {
	  ContentModelAmbiguity &a(ambiguities.grow());
	  a.from = this;
	  a.to1 = prev;
	  a.to2 = follow_[i];
	  a.andDepth = andFollow[i].andDepth;
	}
	if (andFollow[previ].isolated)
	  elementTransition[ei] = j;
      }
      else
	elementTransition[ei] = j;
      j++;
    }
  }
  follow_.setLength(j);
  andInfo_->follow.setLength(j);
}

void ContentToken::analyze(GroupInfo &info,
			   const AndModelGroup *andAncestor,
			   unsigned andGroupIndex,
			   FirstSet &first,
			   LastSet &last)
{
  analyze1(info, andAncestor, andGroupIndex, first, last);
  if (occurrenceIndicator_ & opt)
    inherentlyOptional_ = 1;
  if (inherentlyOptional_)
    first.setNotRequired();
  if (occurrenceIndicator_ & plus)
    addTransitions(last, first, 0,
		   andIndex(andAncestor), andDepth(andAncestor));
}

void LeafContentToken::analyze1(GroupInfo &info,
				const AndModelGroup *andAncestor,
				unsigned andGroupIndex,
				FirstSet &first,
				LastSet &last)
{
  leafIndex_ = info.nextLeafIndex++;
  typeIndex_ = info.nextTypeIndex[element_ ? element_->index() : 0]++;
  if (andAncestor) {
    andInfo_ = new AndInfo;
    andInfo_->andAncestor = andAncestor;
    andInfo_->andGroupIndex = andGroupIndex;
  }
  first.init(this);
  last.setLength(1);
  last[0] = this;
  inherentlyOptional_ = 0;
}

void PcdataToken::analyze1(GroupInfo &info,
			   const AndModelGroup *andAncestor,
			   unsigned andGroupIndex,
			   FirstSet &first,
			   LastSet &last)
{
  info.containsPcdata = 1;
  LeafContentToken::analyze1(info, andAncestor, andGroupIndex, first, last);
}

void OrModelGroup::analyze1(GroupInfo &info,
			    const AndModelGroup *andAncestor,
			    unsigned andGroupIndex,
			    FirstSet &first,
			    LastSet &last)
{
  member(0).analyze(info, andAncestor, andGroupIndex, first, last);
  first.setNotRequired();
  inherentlyOptional_ = member(0).inherentlyOptional();
  for (unsigned i = 1; i < nMembers(); i++) {
    FirstSet tempFirst;
    LastSet tempLast;
    member(i).analyze(info, andAncestor, andGroupIndex, tempFirst, tempLast);
    first.append(tempFirst);
    first.setNotRequired();
    last.append(tempLast);
    inherentlyOptional_ |= member(i).inherentlyOptional();
  }
}

void SeqModelGroup::analyze1(GroupInfo &info,
			     const AndModelGroup *andAncestor,
			     unsigned andGroupIndex,
			     FirstSet &first,
			     LastSet &last)
{
  member(0).analyze(info, andAncestor, andGroupIndex, first, last);
  inherentlyOptional_ = member(0).inherentlyOptional();
  for (unsigned i = 1; i < nMembers(); i++) {
    FirstSet tempFirst;
    LastSet tempLast;
    member(i).analyze(info, andAncestor, andGroupIndex, tempFirst, tempLast);
    addTransitions(last, tempFirst, 1,
		   andIndex(andAncestor), andDepth(andAncestor));
    if (inherentlyOptional_)
      first.append(tempFirst);
    if (member(i).inherentlyOptional())
      last.append(tempLast);
    else
      tempLast.moveTo(last);
    inherentlyOptional_ &= member(i).inherentlyOptional();
  }
}

void AndModelGroup::analyze1(GroupInfo &info,
			     const AndModelGroup *andAncestor,
			     unsigned andGroupIndex,
			     FirstSet &first,
			     LastSet &last)
{
  andDepth_ = ContentToken::andDepth(andAncestor);
  andIndex_ = ContentToken::andIndex(andAncestor);
  andAncestor_ = andAncestor;
  andGroupIndex_ = andGroupIndex;
  if (andIndex_ + nMembers() > info.andStateSize)
    info.andStateSize = andIndex_ + nMembers();
  Vector<FirstSet> firstVec(nMembers());
  Vector<LastSet> lastVec(nMembers());
  member(0).analyze(info, this, 0, firstVec[0], lastVec[0]);
  first = firstVec[0];
  first.setNotRequired();
  last = lastVec[0];
  inherentlyOptional_ = member(0).inherentlyOptional();
  for (unsigned i = 1; i < nMembers(); i++) {
    member(i).analyze(info, this, i, firstVec[i], lastVec[i]);
    first.append(firstVec[i]);
    first.setNotRequired();
    last.append(lastVec[i]);
    inherentlyOptional_ &= member(i).inherentlyOptional();
  }
  for (i = 0; i < nMembers(); i++) {
    for (unsigned j = 0; j < nMembers(); j++)
      if (j != i)
	addTransitions(lastVec[i], firstVec[j], 0,
		       andIndex() + nMembers(),
		       andDepth() + 1,
		       !member(j).inherentlyOptional(),
		       andIndex() + j, andIndex() + i);
  }
}

void ContentToken::addTransitions(const LastSet &from,
				  const FirstSet &to,
				  Boolean maybeRequired,
				  unsigned andClearIndex,
				  unsigned andDepth,
				  Boolean isolated,
				  unsigned requireClear,
				  unsigned toSet)
{
  size_t length = from.length();
  for (unsigned i = 0; i < length; i++)
    from[i]->addTransitions(to,
			    maybeRequired,
			    andClearIndex,
			    andDepth,
			    isolated,
			    requireClear,
			    toSet);
}

void LeafContentToken::addTransitions(const FirstSet &to,
				      Boolean maybeRequired,
				      unsigned andClearIndex,
				      unsigned andDepth,
				      Boolean isolated,
				      unsigned requireClear,
				      unsigned toSet)
{
  if (maybeRequired && to.requiredIndex() != size_t(-1)) {
    ASSERT(requiredIndex_ == size_t(-1));
    requiredIndex_ = to.requiredIndex() + follow_.length();
  }
  size_t length = follow_.length();
  size_t n = to.length();
  follow_.setLength(length + n);
  for (size_t i = 0; i < n; i++)
    follow_[length + i] = to.token(i);
  if (andInfo_) {
    andInfo_->follow.setLength(length + n);
    for (size_t i = 0; i < n; i++) {
      Transition &t(andInfo_->follow[length + i]);
      t.clearAndStateStartIndex = andClearIndex;
      t.andDepth = andDepth;
      t.isolated = isolated;
      t.requireClear = requireClear;
      t.toSet = toSet;
    }
  }
}

AndState::AndState(unsigned n)
: v_(n), clearFrom_(0)
{
  setAll(v_.pointer(), v_.length(), PackedBoolean(0));
}

void AndState::clearFrom1(unsigned i)
{
  while (clearFrom_ > i)
    v_[--clearFrom_] = 0;
}

MatchState::MatchState()
: andState_(0)
{
}

MatchState::MatchState(const CompiledModelGroup *model)
: pos_(model ? model->initial() : 0),
  andState_(model ? model->andStateSize() : 0),
  minAndDepth_(0)
{
}

const LeafContentToken *MatchState::invalidExclusion(const ElementType *e)
     const
{
  const LeafContentToken *token = pos_->transitionToken(e, andState_,
							minAndDepth_);
  if (token && !token->inherentlyOptional() && !token->orGroupMember())
    return token;
  else
    return 0;
}

const LeafContentToken *
LeafContentToken::transitionToken(const ElementType *to,
				  const AndState &andState,
				  unsigned minAndDepth) const
{
  LeafContentToken *const *p = follow_.pointer();
  if (!andInfo_) {
    for (size_t n = follow_.length(); n > 0; n--, p++)
      if ((*p)->elementType() == to)
	return *p;
  }
  else {
    const Transition *q = andInfo_->follow.pointer();
    for (size_t n = follow_.length(); n > 0; n--, p++, q++)
    if ((*p)->elementType() == to
	&& ((q->requireClear == Transition::invalidIndex
	     || andState.isClear(q->requireClear))
	    && q->andDepth >= minAndDepth))
      return (*p);
  }
  return 0;
}

Boolean
LeafContentToken::tryTransition(const ElementType *to,
				AndState &andState,
				unsigned &minAndDepth,
				const LeafContentToken *&newpos) const
{
  LeafContentToken *const *p = follow_.pointer();
  if (!andInfo_) {
    for (size_t n = follow_.length(); n > 0; n--, p++) {
      if ((*p)->elementType() == to) {
	newpos = *p;
	minAndDepth = newpos->computeMinAndDepth(andState);
	return 1;
      }
    }
  }
  else {
    const Transition *q = andInfo_->follow.pointer();
    for (size_t n = follow_.length(); n > 0; n--, p++, q++) {
    if ((*p)->elementType() == to
	&& ((q->requireClear == Transition::invalidIndex
	     || andState.isClear(q->requireClear))
	    && q->andDepth >= minAndDepth)) {
	if (q->toSet != Transition::invalidIndex)
	  andState.set(q->toSet);
	andState.clearFrom(q->clearAndStateStartIndex);
	newpos = *p;
	minAndDepth = newpos->computeMinAndDepth(andState);
	return 1;
      }
    }
  }
  return 0;
}


unsigned LeafContentToken::computeMinAndDepth1(const AndState &andState) const
{
  ASSERT(andInfo_ != 0);
  unsigned groupIndex = andInfo_->andGroupIndex;
  for (const AndModelGroup *group = andInfo_->andAncestor;
       group;
       groupIndex = group->andGroupIndex(), group = group->andAncestor())
    for (unsigned i = 0; i < group->nMembers(); i++)
      if (i != groupIndex && !group->member(i).inherentlyOptional()
	  && andState.isClear(group->andIndex() + i))
	return group->andDepth() + 1;
  return 0;
}

const LeafContentToken *
LeafContentToken::impliedStartTag(const AndState &andState,
				  unsigned minAndDepth) const
{
  if (requiredIndex_ != size_t(-1)) {
    if (!andInfo_)
      return follow_[requiredIndex_];
    const Transition &t(andInfo_->follow[requiredIndex_]);
    if ((t.requireClear == Transition::invalidIndex
	 || andState.isClear(t.requireClear))
	&& t.andDepth >= minAndDepth)
      return follow_[requiredIndex_];
  }
  return 0;
}

void LeafContentToken::doRequiredTransition(AndState &andState,
					    unsigned &minAndDepth,
					    const LeafContentToken *&newpos)
     const
{
  ASSERT(requiredIndex_ != size_t(-1));
  if (andInfo_) {
    const Transition &t(andInfo_->follow[requiredIndex_]);
    if (t.toSet != Transition::invalidIndex)
      andState.set(t.toSet);
    andState.clearFrom(t.clearAndStateStartIndex);
  }
  newpos = follow_[requiredIndex_];
  minAndDepth = newpos->computeMinAndDepth(andState);
}

FirstSet::FirstSet()
: requiredIndex_(size_t(-1))
{
}

void FirstSet::init(LeafContentToken *p)
{
  v_.setLength(256);		// preallocate some space
  v_.setLength(1);
  v_[0] = p;
  requiredIndex_ = 0;
}

void FirstSet::append(const FirstSet &set)
{
  if (set.requiredIndex_ != size_t(-1)) {
    ASSERT(requiredIndex_ == size_t(-1));
    requiredIndex_ = set.requiredIndex_ + v_.length();
  }
  v_.append(set.v_);
}
