#!/usr/bin/perl -w

#############################################################
# Copyright 1998 VMware, Inc.  All rights reserved. -- VMware Confidential
#############################################################

#
# DOMAccess.pm
# 
# A class that provides quick access to DOM nodes.
#

# Notes:
#
# 7/10/02:
#    toEncodedString() has been deprecated in favor of the method
#    toXmlString().
#
#    newFile() should only be used when an ASCII character sets is
#    being used. -jhu
#

# XXX Disposal of data is not quite correct.  The abstraction calls for
# calling dispose() every time a DOMAccess tree is created via a new
# constructor or a getSubTree(), even after the object was added via
# addSubTree().  However, the abstraction is broken because if you
# free a tree created via new, any subtrees created from it will be
# left with a dangling reference.
#
# In practice, this is rarely done since you generally work on a subtree
# in the context of the main tree, so the subtree usually gets freed
# before the main tree.  Also, we delay the disposal of DOMAccess nodes
# for performance reasons, and this has the side effect of making dangling
# references far less likely to occur.
#
# There are many ways to fix this such as keeping extra references between
# trees and subtrees and using usage counts, but it's probably not worth the
# effort as the XML way of doing things is deprecated in favor of VMDB.
# -jhu

package VMware::DOMAccess;
use strict;
use XML::DOM;
use Carp qw( cluck croak );

# To turn on warning reporting.
my $DOMDOC_WARNING = 0;

# To bomb out if a warning occurs.
my $DOMDOC_WARNING_FATAL = 0;

# Maximum number of elements that should be created at a time in any
# set operation.
my $MAX_CREATE_ELEMENT = 20;       

# Maximum number of documents to keep in the dispose list before actually
# disposing of the objects.  Make serverd's list larger to get some
# responsiveness improvements since serverd tends to use smaller objects.
my $MAX_DISPOSE_LIST = 10;
if ($ENV{vmware_VMSERVERD}) {
   $MAX_DISPOSE_LIST = 200;
}

# Used by newFile/newXML to load XML
my $parser = new XML::DOM::Parser;

# Queue up object to be disposed, periodically get rid of them
my @disposeList = ();

my $gLeakDumpPeriod = 120; # seconds
my $gIterCount = 0;
my $gTrackLeaks = 0; 
my %gAllocatedObjs;


#####################################################################
# Public Static Methods (indep. of each object)                     #
#####################################################################

sub new($) {
    my ($class, $rootTag) = @_;

    my $doc = new XML::DOM::Document;
    my $node = $doc->createElement($rootTag);
    $node->setOwnerDocument($doc);
    $doc->appendChild($node);
    return VMware::DOMAccess->newElement($doc);
}

sub newElement($) {
    my ($class) = shift;
    my ($element) = @_;

    my $self = {};
    $self->{DOC} = $element;

    my ($p, $f, $l) = caller(1);
    allocateObj($self, $f . "::" . $l);

    bless($self, $class);
    return($self);
}

# XXX Warning, do not use this function as it does not properly handle
# arbitrary characters.
sub newFile($) {
    my ($class, @args) = @_;
    my $file = "";

    if ( $#args > -1 ) {
        $file = $args[0];
    }

    my $self = {};

    $self->{DOC} = $parser->parsefile($file);
    
    my ($p, $f, $l) = caller(1);
    allocateObj($self, $f . "::" . $l);

    bless($self, $class);
    return $self;
}

sub newXML($) {
    my ($class, @args) = @_;
    my ($strDoc) = @args;

    my $self = {};

    $strDoc = encodeAsXml($strDoc);
    $self->{DOC} = $parser->parsestring($strDoc);
    
    my ($p, $f, $l) = caller(1);
    allocateObj($self, $f . "::" . $l);

    bless($self, $class);
    return $self;
}

sub setVerbose($) {
    $DOMDOC_WARNING = shift;
}

sub setFatalWarning($) {
    $DOMDOC_WARNING_FATAL = shift;
}


#####################################################################
# Public Methods                                                    #
#####################################################################

# Accessor functions

# Checks to see if an element exists.
sub hasElement($) {
    my($self) = shift;
    my($key, $create) = @_;

    my $root = $self->getRootElement();

    if( !defined($root) ) {
	# Error
	p_warning("DOMAccess.hasElement: Undefined root document element.");
	return(undef);
    }

    my @identifiers = parseIdentifier($key);
    my @keys = map($_->{BASE}, @identifiers);
    my @indices = map($_->{INDEX}, @identifiers);

    # Make sure the first identifier is the same as the document root.
    if( $#keys > -1 && 
        $root->getTagName() ne $keys[0] && 
        $keys[0] ne "" && 
        $indices[0] == 0 ) {
	# The first identifiers do not match
	return(0);
    }

    # Set the element pointer e to the root element.
    my $e = $root;

    my $i;
    for( $i = 1; $i <= $#keys; $i++ ) {
	if( $indices[$i] < 0 ) {
	    return(0);
	}
	
	my @elements = $e->getElementsByTagName( $keys[$i], 0 );
	
	if( $#elements < $indices[$i] ) {
	    # not found
	    return(0);
	}

	# Success, take the one references in indices.
	$e = $elements[$indices[$i]];
    }
    
    return(1);
}

# Gets the top-level DOM document object
sub getDocument() {
    my($self) = shift;
    if( $self->isSubTree() ) {
	return(undef);
    } else {
	return($self->{DOC});
    }
}

# Internal function.  Not to be used.
sub getElement() {
    my($self) = shift;
    if( $self->isSubTree() ) {
	return($self->{DOC});
    } else {
	return(undef);
    }
}

# Gets the root element of this DOM tree.  If the tree is a subtree,
# it gets the element where the subtree begins.
sub getRootElement() {
    my($self) = shift;

    my $doc;
    my $root;

    if( !defined($self->{ROOT_ELEMENT}) ) {
        if( $self->isSubTree() ) {
       	    $root = $self->getElement();
        } else {
            $doc = $self->getDocument();
            $root = $doc->getDocumentElement();
        }
        $self->{ROOT_ELEMENT} = $root;
    }

    return $self->{ROOT_ELEMENT};
}

# Gets a DOM element.
# Returns undef if the element does not exist.
sub get($) {
    my($self) = shift;
    my($key) = @_;

    return $self->getInternal($key, 0);
}

# An internal get function not intended for the user
# Gets a node given a name.  If create is true, will create a node if it does not
# exist.
sub getInternal($$) {
    my($self) = shift;
    my($key, $create) = @_;

    # Optimization: cache last lookup and lookup for '.'
    if( defined($self->{LAST_KEY}) && ($key eq $self->{LAST_KEY}) ) {
        return $self->{LAST_ELEMENT};
    } elsif( ($key eq '.') && defined($self->{DOT_ELEMENT}) ) {
        return $self->{DOT_ELEMENT};
    }

    my $root = $self->getRootElement();

    if( !defined($root) ) {
	# Error
	p_warning("DOMAccess.getInternal: Undefined root document element.");
	return(undef);
    }

    my @identifiers = parseIdentifier($key);
    my @keys = map($_->{BASE}, @identifiers);
    my @indices = map($_->{INDEX}, @identifiers);

    # Make sure the first identifier is the same as the document root.
    if( $#keys > -1 && $root->getTagName() ne $keys[0] && $keys[0] ne "" && $indices[0] == 0 ) {
	# Error
	p_warning("DOMAccess.getInternal: Invalid first identifier.  Key is '$key'.  Aborted.\n");
	return(undef);
    }

    # Set the element pointer e to the root element.
    my $e = $root;

    my $i;
    for( $i = 1; $i <= $#keys; $i++ ) {
	if( $indices[$i] < 0 ) {
	    p_warning("DOMAccess.getInternal: Invalid index '" . $indices[$i] . "' received for key '$key'.\n");
	    return(undef);
	}
	
	my @elements = $e->getElementsByTagName( $keys[$i], 0 );
	
	if( $#elements < $indices[$i] ) {
	    # not found, create if if the create flag is true
	    if( $create ) {
		my $doc = $e->getOwnerDocument();
		
		if( $DOMDOC_WARNING && $indices[$i] - $#elements > $MAX_CREATE_ELEMENT ) {
		    p_warning("Warning: Creating too many elements.  Possible invalid index being used.\n");
		}
		
		for( my $j = $#elements; $j < $indices[$i]; $j++ ) {
		    my $newelem = $doc->createElement( $keys[$i] );
		    $e->appendChild( $newelem );
		}
		@elements = $e->getElementsByTagName( $keys[$i], 0 );
	    } else {
		# Don't print this warning anymore since it's annoying.
		#p_warning("DOMAccess.getInternal: Invalid identifier encountered.  Key is '$key'. Aborted.\n");
		return(undef);
	    }
	}

	# Success, take the one references in indices.
	$e = $elements[$indices[$i]];
    }
    
    # Optimization: cache last lookup and lookup for '.'
    $self->{LAST_KEY} = $key;
    $self->{LAST_ELEMENT} = $e;
    if( $key eq '.' ) {
        $self->{DOT_ELEMENT} = $e;
    }

    return($e);
}

sub getAttribute($$) {
    my($self) = shift;
    my($key, $attr) = @_;

    if( $key eq "" ) {
	# It wouldn't be a DOM object if the root node didn't have any elements
	return("");
    }

    my $e = $self->get($key);

    if( defined($e) ) {
	my $result = $e->getAttribute($attr);
	return(decodeInternal($result));
    } else {
	return("");
    }
}

sub getRootName() {
    my($self) = shift;
    my $root = $self->getRootElement();
    my $name = $root->getTagName();
    return($name);
}

# Gets a DOMAccess object representing the subtree
sub getSubTree($) {
    my($self) = shift;
    my($key) = @_;

    my $str = "";
    my $e = $self->get($key);

    if( defined($e) ) {
	# Special case: going for the root element.
	if( $key eq "" ) {
	    return($self);
	}
	return( __PACKAGE__->newElement($e) );
    }
    
    return(undef);
}


# Gets the string value of an element.  If the element has a
# sub-element, it will get that in string format too.  This method
# will automatically decode the string.  See the encoding section for
# more information about the encoding scheme.
#
# Returns a null string if the value does not exist.
sub getValue($) {
    my($self) = shift;
    my($key) = @_;
    return $self->getValueInternal($key, \&decodeInternal);
}


# Gets the string value of an element.  If the element has a
# sub-element, it will get that in string format too.  This method
# preserves the encoding of the string so that it may be transmitted
# and reparsed at the receiving end.  See the encoding section for
# more information about the encoding scheme.
#
# To get the entire DOMAccess tree/subtree use a null string as
# the argument to this function.
#
# Returns a null string if the value does not exist.
sub toXmlString($) {
    my($self) = shift;
    my($key) = @_;
    return $self->getValueInternal($key, \&decodeAsXml);
}


# Internal getValue
sub getValueInternal($$) {
   my($self) = shift;
   my($key, $decodeFunc) = @_;
   
   my $str = "";
   
   my $e = $self->get($key);
   
   if (defined($e)) {
      # Special case: going for the root element.
      if ($key eq "") {
         $str = $e->toString();
         $str = &{$decodeFunc}($str);
         return($str);
      }
      
      my @children = $e->getChildNodes();
      
      my $c;
      foreach $c (@children) {
         my $s = $c->toString();
         $s = &{$decodeFunc}($s);
         $str .= $s;
      }
   }
   
   return($str);
}

# Deprecated in favor of toXmlString()
sub toEncodedString($) {
    my($self) = shift;
    my($key) = @_;
    return $self->toXmlString($key);
}

# Checks if there are any more element nodes below an element.
sub hasChildElement($) {
    my($self) = shift;
    my($key) = @_;

    if( $key eq "" ) {
	# It wouldn't be a DOM object if the root node didn't have any elements
	return(1);
    }
    
    my $e = $self->get($key);
    
    if( defined($e) ) {
	my @nodes = $e->getChildNodes();
	for my $node (@nodes) {
	    if($node->getNodeType() == XML::DOM::ELEMENT_NODE) {
		return(1);
	    }
	}
    }
    
    return(0);
}

# Returns an array of element names
# Takes in the name of an element to look in.
# Also applies a filter when listing so you can find only the 
# ones that you want.
sub listElementNames() {
    my($self) = shift;
    my($key, $filter) = @_;
    my @names;

    my %nameCount;

    if( !defined($key) ) {
	p_warning("DOMAccess.listElementNames: Did not get a key on call to listElementNames\n");
	return(@names);
    }

    if( $key eq "" ) {
	my $name = $self->getRootName() . "[0]";
	push(@names, $name);
	return(@names);
    }

    my $e = $self->get($key);

    if( defined($e) ) {
	my @nodes = $e->getChildNodes();
	for my $node (@nodes) {
	    if($node->getNodeType() == XML::DOM::ELEMENT_NODE) {
		# Add the node name if it is unique.
		my $name = $node->getTagName();

		if( defined($nameCount{$name}) ) {
		    $nameCount{$name} = $nameCount{$name} + 1;
		    $name .= "[" . ($nameCount{$name} - 1) . "]";
		} else {
		    $nameCount{$name} = 1;
		    $name .= "[0]";
		}

		# If there is a filter and the node doesn't match the
		# filter, don't report it.
		if( defined($filter) && $name !~ /$filter/ ) {
		    next;
		}

		push(@names, $name);
	    }
	}
    }

    return(@names);
}


# Returns an array of element names
sub listAttributeNames($) {
    my($self) = shift;
    my($key) = @_;
    my @names;

    if( $key eq "" ) {
	my $name = $self->getRootName();
	push(@names, $name);
	return(@names);
    }

    my $e = $self->get($key);

    if( defined($e) ) {
	my $namedMap = $e->getAttributes();

	my $i;
	for( $i = 0; $i < $namedMap->getLength(); $i++ ) {
	    my $name = $namedMap->item($i)->getName();
	    push(@names, $name);
	}
    }

    return(@names);
}



# Mutator functions
sub setValue($$) {
    my($self) = shift;
    my($key, $value, $encoded) = @_;

    if( !defined($value) ) {
	p_warning("DOMAccess.setValue: Got an undefined value.  Using null string.\n");
	$value = "";
    }

    if( !defined($key) ) {
	p_warning("DOMAccess.setValue: Got an undefined key.  Aborting.\n");
	return;
    }

    if( $key eq "" ) {
	# Warning, cannot change the root element
	p_warning("DOMAccess.setValue: Cannot change root element.  Set aborted.\n");
	return;
    }

    # Get the element and create it if necessary
    my $e = $self->getInternal($key, 1);

    if( !defined($e) ) {
	p_warning("DOMAccess.setValue: Could not set because get did not return true.  Aborted.\n");
	return;
    }

    # Value is a string.
    if( ref($value) ) {
	p_warning("DOMAccess.setValue: Could not set because value is not scalar.  Got type '" . ref($value) . "'\n");
	return;
    }

    # Remove all other nodes except attributes if this is a string
    my $node;
    foreach $node ($e->getChildNodes()) {
	if( $node->getNodeType() != XML::DOM::ATTRIBUTE_NODE ) {
	    $e->removeChild($node);
	    $node->dispose();
	}
    }
    
    # Create a text node.
    if( length($value) > 0 ) {
	# Add the new text node
	my $doc = $e->getOwnerDocument();
        # XXX This does not work in the case of non-printable characters -jhu
	$e->appendChild($doc->createTextNode($encoded ? $value : encodeInternal($value)));
    }
}


sub addSubTree($$) {
    my($self) = shift;
    my($key, $value) = @_;

    # Get the element and create it if necessary
    my $e = $self->getInternal($key, 1);

    if( !defined($e) ) {
	p_warning("DOMAccess.addSubTree: Could not set because get did not return true.  Aborted.\n");
	return;
    }

    if( ref($value) eq __PACKAGE__ ) {
	# Value is another DOMAccess node.
#	if( $value->isSubTree() ) {
#	    # Cannot accept sub trees.  Can only accept whole trees.
#	    p_warning("DOMAccess.addSubTree: Could not add because value is a subtree.  Cannot use a subtree as a value.\n");
#	    return;
#	}
	
	# Grab the node and set the owner to the new document to be inserted.
        my $node = $value->getRootElement();
        my $parent = $node->getParentNode();
        my $newElem = $parent->removeChild($node);

	$newElem->setOwnerDocument($e->getOwnerDocument());
	$e->appendChild($newElem);
	return;
    }

    p_warning("DOMAccess.addSubTree: Could not add subtree because value is not a DOMAccess object.\n");
}

# Sets an attribute for an element, creating if necessary.
sub setAttribute($$) {
    my($self) = shift;
    my($key, $attr, $value, $encoded) = @_;

    if( $key eq "" ) {
	# Warning, cannot change the root element
	p_warning("DOMAccess.setAttribute: Cannot change attribute of null element.  Set aborted.\n");
	return;
    }

    # Get the element and create it if necessary
    my $e = $self->getInternal($key, 1);

    if( !defined($e) ) {
	# Warning, cannot change an attribute for an element that does not exist
	p_warning("DOMAccess.setAttribute: Element '$key' does not exist.  Cannot set attribute.\n");
	return;
    }
	
    my $v = $encoded ? $value : encodeInternal($value);
    $e->setAttribute($attr, $v);
}

sub removeElement($) {
    my($self) = shift;
    my($key) = @_;

    if( $key eq "" || $key eq $self->getRootName() ) {
	# Warning, cannot change the root element
	p_warning("DOMAccess.removeElement: Cannot change root element.  Set aborted.\n");
	return;
    }

    # Get the element and create it if necessary
    my $oldElem = $self->getInternal($key, 0);

    if( !defined($oldElem) ) {
	# Warning: no need to remove since it doesn't exist!
	p_warning("DOMAccess.removeElement: Not removing element '$key' because does not exist!\n");
	return;
    }

    my @keys = parseIdentifier($key);
    pop(@keys);
    my $parentKey = makeIdentifier(@keys);

    # Remove element from parent.  Parent must exist if the child exists.
    my $parentElem = $self->getInternal($parentKey, 0);
    $parentElem->removeChild($oldElem);
    $oldElem->dispose();

    # Conservatively clear last element cache
    undef $self->{LAST_KEY};
    undef $self->{LAST_ELEMENT};
}

sub removeAttribute($$) {
    my($self) = shift;
    my($key, $attribute) = @_;

    if( $key eq "" ) {
	# Warning, cannot change the root element
	p_warning("DOMAccess.removeAttribute: Cannot change attribute of null element.  Set aborted.\n");
	return;
    }

    if( $attribute eq "" ) {
	# Warning, cannot change the root element
	p_warning("DOMAccess.removeAttribute: Cannot change null attribute.  Set aborted.\n");
	return;
    }

    # Get the element
    my $e = $self->getInternal($key, 0);

    if( !defined($e) ) {
	# Warning, cannot change an attribute for an element that does not exist
	p_warning("DOMAccess.removeAttribute: Element '$key' does not exist.  Cannot remove attribute.\n");
	return;
    }
	
    my $attr = $e->removeAttribute($attribute);
    $attr->dispose();
}

# Dispose objects when done.  Allows DOM object to be garbage 
# collected. 
#
# This function should be called when done with a DOMAccess object
# otherwise memory will be leaked as the DOM data structure has circular
# references.
#
# This function does not need to be called if the DOMAccess object
# is not a subtree.  However it doesn't hurt.
#
# dispose() only queues objects; 
# we periodically call execDispose() to dispose of them en masse.

sub LeakTrackingAction($) {
    my ($action) = @_;

    if ($action eq "start") {
	$gTrackLeaks = 1;
    }

    if ($action eq "stop") {
	$gTrackLeaks = 0;
    }
}

sub allocateObj($$)
{
    my ($obj, $caller) = @_;

    if (!$gTrackLeaks || !$obj) {
	return;
    }

    if (defined($gAllocatedObjs{int($obj)})) {
	&VMware::VMServerd::Warning("DOMAccess::allocateObj: Already allocated.\n");
	return;
    }

    $gAllocatedObjs{int($obj)} = $caller;
}

sub freeObj($) 
{
    my ($obj) = @_;

    if (!$gTrackLeaks || !$obj) {
	return;
    }

    if (defined($gAllocatedObjs{int($obj)})) {
	delete($gAllocatedObjs{int($obj)});
	return;
    }

    &VMware::VMServerd::Panic("Freeing an object that was not allocated.\n");
}


sub dumpLeaks()
{
    if (!$gTrackLeaks) {
	return;
    }

    for my $k (keys(%gAllocatedObjs)) {
	&VMware::VMServerd::Warning("Potential leak of object " .
				    $k . " allocated at " . 
				    $gAllocatedObjs{$k} . "\n");
    }
}

sub dispose() {
    my $self = shift;

    freeObj($self);
    if( !$self->isSubTree() ) {
        push @disposeList, $self->getDocument();
        if (scalar(@disposeList) > $MAX_DISPOSE_LIST) {
            execDispose();
        }
    }
}

sub execDispose () {
    my $count = 0;
    while( my $obj = shift @disposeList ) {
        $obj->dispose();
        $count++;
    }

    if (($gIterCount % $gLeakDumpPeriod) == 0) {
	dumpLeaks();
    }
    $gIterCount++;

    return $count;
}

# Function to print the decoded XML string in a nice format
sub prettyPrint() {
    my $self = shift;
    my $e = $self->getRootElement();

    return(_prettyPrint($e));
}

# Internal function used to print the decoded XML string in a nice format
sub _prettyPrint {
    my $element = shift;
    my $depth = shift;
    my $INDENT = "    ";
    my $indent = "";
    my $s = "";
    my $i;

    if( !defined($depth) ) {
	$depth = 0;
    }

    for($i = 0; $i < $depth; $i++) {
	$indent .= $INDENT;
    }

    my $attr = "";
    my $attributes = $element->getAttributes();
    for( $i = 0; $i < $attributes->getLength(); $i++ ) {
	my $attribute = $attributes->item($i);
	$attr .= " " . $attribute->getName() . "=\"" . decodeInternal($attribute->getValue()) . "\"";
    }
    my $name = $element->getTagName();
    $s .= $indent . "<" . $name . $attr . ">";

    my $recursed = 0;
    my @nodes = $element->getChildNodes();
    for my $node (@nodes) {
	if($node->getNodeType() == XML::DOM::ELEMENT_NODE) {
	    $s .= "\n";
	    $s .= _prettyPrint($node, $depth + 1);
	    $recursed = 1;
	} elsif($node->getNodeType() == XML::DOM::TEXT_NODE) {
	    $s .= decodeInternal($node->toString());
	}
    }

    if( $recursed ) {
	$s .= "\n$indent";
    }

    $s .= "</$name>";

    return($s);
}

#
# ENCODING SECTION
#
# Following are DOMAccess string encoding/decoding routines.
#
# The goal of DOMAccess is 2 fold:
#
# 1. DOMAccess provides an abstraction where any data can be stored
#    and retrieved from in the object.  Whatever data is stored can be
#    directly accessed by a single query with a key.  Unlike standard
#    DOM, the caller does not do any tree traversals.  DOMAccess will
#    do the traversals automatically.
#
# 2. The XML that is produced by DOMAccess is well-formed XML that
#    uses only valid XML characters that can be used to build other
#    DOMAccess objects.  Ideally, the XML would be a perfect serialization
#    of the DOMAccess object such that a get on a DOMAccess object
#    that was created from XML would always result in the same data
#    that was set on the DOMAccess object used to create the XML.
#
#    However, since XML doesn't allow certain character codes, it
#    is not possible to use both XML and have it be a perfect
#    serialization without more dramatic escaping that what is used
#    here.  Furthermore, the XML was intended to be passed to third
#    party XML modules which are not aware of the special escaping
#    used to represent DOMAccess.  As a result, we drop the feature
#    of perfect serialization and settle for creating well-formed
#    XML.  This is accomplished by filtering out characters that
#    would cause a standard XML parser to choke.
#
# The DOMAccess encoding scheme makes it so that no data contains any
# syntactically significant characters.  For the DOM class upon which
# the DOMAccess class depends, the only significant characters are
# '<' and '&'.
#
# We use an escaping scheme in which we use '%' to indicate the
# start of an escape sequence and ';' to indicate the end of an escape
# sequence.  Each character that is to be escaped is mapped to another
# set of characters using the character's code value.  For example,
# '&' is escaped to '%#38;', '<' is escaped to '%#60;', and '%' itself
# is escaped to '%#37;'.
#
# This encoding scheme is applied to non-printable characters such as
# the characters with code > 127 and most characters < 32.  One thing
# to note about this coding scheme is that the scheme very much resembles
# how XML represents character references.  The main difference being
# that a '%' is used to indicate a special entity instead of '&'.
#
# The '%' character was chosen to ensure that the DOMAccess encoding
# scheme uses characters that are not significant to DOM.  Thus, no
# further escaping will be done on any characters of a DOMAccess 
# escape sequence.  Furthermore, the escaping can be done safely on
# the entire XML string without having to worry about the structure
# of the document.
#
# There are a few rules for how this encoding scheme works:
#
# 1. Data that is passed into DOMAccess can be retrieved as is via
#    the setValue/getValue functions.  No interpretation will be
#    done on any of the data.  What you put in is what you get out.
#
# 2. Well-formed XML can be generated from DOMAccess via the
#    toXmlString function.
#
# 3. A DOMAccess object can be built from well-formed XML via the
#    newXML function.
#
# 4. Binary data is the preferred form of data to be passed into
#    DOMAccess although it is possible to use character references
#    in the XML string that is passed into newXML.  However, these
#    character references should not refer to a character > 255 as
#    DOMAccess is meant to be encoding blind.
#
# The way this encoding scheme is applied is 
#
# newXML: uses encodeAsXml()
#   1. Filters bad binary characters and bad character references
#      via filterString(1).
#   2. Escapes all non-printable characters via escapeXml().
#   3. Character references are automatically interpreted by the
#      underlying XML parser as are the '&lt;' and '&amp;' entities.
#
# setValue: uses encodeInternal()
#   1. Filters bad binary characters via filterString(0).  Character
#      references are left unmodified.
#   2. Escapes all non-printable characters and the internal escape
#      characters '%', '&', and "<' via escapeInternal().  These
#      characters are escaped as '%#37;', '%#38;', and '%#60;'
#
# getValue: uses decodeInternal()
#   1. Converts '&lt' and '&amp' to '&' and '<'.  The underlying DOM
#      module always escapes these characters automatically.
#   2. Decodes XML character references to their binary forms via
#      unescapeXmlCharRef().  Done because DOM always represents some
#      characters via a character reference.
#   3. Unescapes all characters of the form '%#XXX;' including
#      non-printable characters and the internal escape characters.
#      This is done via unescape().
#
# toXmlString: uses decodeAsXml()
#   1. Converts '%#38;' and '%#60;' into the more common XML form
#      of '&amp;' and '&lt;'.  Note that '%#37;' ('%') does not
#      need to be unescaped here.
#   2. Decodes XML character references to their binary forms via
#      unescapeXmlCharRef().  Done because DOM always represents some
#      characters via a character reference.
#   3. Unescape non-printable characters via unescape().  Any '%#37;'
#      that are escaped are unescaped here.  Note that this final
#      unescaping must happen after '%#38;' and '%#60;' are converted.
#      This is done via unescape().
#

# Encodes a string that is received through the binary set interface.
sub encodeInternal($) {
    my ($src) = @_;
    $src = filterString($src, 0);
    $src = escapeInternal($src);
    return $src;
}

# Encodes an XML string that is received through the XML interface.
sub encodeAsXml($) {
   my ($src) = @_;
   $src = filterString($src, 1);
   $src = escapeXml($src);
   return $src;
}

# Decodes a string to be used externally.
sub decodeInternal($) {
   my ($src) = @_;

   # The DOM class will always turn '&' and '<' characters into '&amp;' and
   # '&lt;' so we convert them back.  It does this to certain unprintable
   # characters too so we unescape these character references.
   $src =~ s/(&amp;)|(&lt;)/defined($1) ? '&' : '<'/egs;
   $src = unescapeXmlCharRef($src);

   return unescape($src);
}

# Decode the string to be used for external XML parsers.
sub decodeAsXml($) {
   my ($src) = @_;

   # When decoding the text as XML, we should make sure that '&' and '<' are
   # escaped properly or else the next XML parser will choke.
   $src =~ s/%\#((38)|(60));/defined($2) ? '&amp;' : '&lt;'/egs;

   $src = unescapeXmlCharRef($src);

   return unescape($src);
}

# Escape the set of non-printable characters.
sub escapeXml {
   my $s = shift;
   return _escape($s, \&isPrintableChar);
}

# Escape the set of non-printable characters and the set of characters
# used internally by DOMAccess.
sub escapeInternal {
   my $s = shift;
   return _escape($s, \&isInternalChar);
}

# Escape characters by converting a character with an ASCII code to
# the form %#code;.  The function $func indicates whether a character
# should be escaped.
sub _escape {
   my ($s, $func) = @_;
   my @inList = unpack('C*', $s);
   my $out = '';

   while (scalar(@inList)) {
      my $c = shift(@inList);
      if (!&{$func}(chr($c))) {
         $out .= "%#$c;";
      } else {
         $out .= chr($c);
      }
   }
   return $out;
}

# Unescape characters represented as %#code; where code is the ASCII
# code for the character.
sub unescape {
   my $s = shift;

   $s =~ s/(%\#(\d+);)/chr($2)/egs;
   return $s;
}

# Convert REAL XML character references in a string to their binary form
# Note no codes > 255 should be passed in.
sub unescapeXmlCharRef($) {
   my ($src) = @_;

   # Note that it operates on char refs beginning with '&'
   $src =~ s/&\#(([0-9]+)|(x[0-9a-fA-F]+));/
      defined($2) ? chr($2) : chr(hex($3)) /egs;
   return $src;
}

######################################################################
# Character maps used in escaping and character filtering            #
######################################################################

# The list of printable characters that will not be escaped.  Note
# that this set consists consists of the printable characters.
my @NonEscapeXmlCharList = ( 9, 10, 13, 32..127 );
my %NonEscapeXmlCharHash = map { $_ => 1 } @NonEscapeXmlCharList;

sub isPrintableChar($) {
   my ($c) = @_;
   return $NonEscapeXmlCharHash{ord($c)} ? 1 : 0;
}

# The list of characters that will not be escaped.  Note that this set
# consists consists of the printable characters minus '&<%' all of
# which are significant escape characters for internal DOMAccess
# representation.
my @NonEscapeInternalCharList = ( 9, 10, 13, 32..36, 39..59, 61..127 );
my %NonEscapeInternalCharHash = map { $_ => 1 } @NonEscapeInternalCharList;

sub isInternalChar($) {
   my ($c) = @_;
   return $NonEscapeInternalCharHash{ord($c)} ? 1 : 0;
}


# The list of characters that do not belong in the XML character set.
# These characters should not be passed into an XML parser.  This set
# of characters is used by filterString.
my @NonXmlCharCodeList = ( 0..8, 11, 12, 14..31 );
my %NonXmlCharCodeHash = map { $_ => 1 } @NonXmlCharCodeList;
my $NonXmlCharCodeMatchStr = ("(" .
                              join("|",
                                   map { sprintf("\\x%.2X", $_) } @NonXmlCharCodeList) .
                              ")");
my $NonXmlCharCodeMatchEscapedStr = ("(" .
                                     join("|", map { "(&#$_;)" } @NonXmlCharCodeList) .
                                     ")");

sub isValidXmlChar($) {
   my ($c) = @_;
   # The hash uses reverse logic to allow for a sparse table.
   return $NonXmlCharCodeHash{ord($c)} ? 0 : 1;
}

# Takes in an unencoded string and filters out characters that are not
# valid XML characters.  Use this function to guard XML parsers from choking
# on invalid, malformed XML.
#
# This function takes an argument indicating whether it should filter out
# character references that refer to invalid characters.
sub filterString($$) {
   my ($src, $charref) = @_;

   # Strip out characters in the ASCII ranges 0-9,11-12,14-31
   $src =~ s/$NonXmlCharCodeMatchStr//g;

   if ($charref) {
      # Strip out the same set of characters represented as an escape sequence
      $src =~ s/$NonXmlCharCodeMatchEscapedStr//g;
   }

   return $src;
}


#####################################################################
# Methods not intended for use by user                              #
#####################################################################

# Breaks the identifier down into an array of keys
sub parseIdentifier($) {
    my ($key) = @_;
    my @keys = split(/\./, $key);

    my @identifiers;

    my $k;
    foreach $k (@keys) {
	my $id = {};
	my $base = $k;
	my $index = 0;

	if( $k =~ /(.+)\[(\d*)\]$/ ) {
	    $base = $1;
	    if( defined($2) && $2 ne "" ) {
		$index = $2;
	    }
	}

	$id->{BASE} = $base;
	$id->{INDEX} = $index;

	push(@identifiers, $id);
    }

    return(@identifiers);
}

sub makeIdentifier(@) {
    my @identifiers = @_;
    if( $#identifiers == -1 ) {
	return("");
    }

    my $s = $identifiers[0]->{BASE};

    my $i;
    for( $i = 1; $i <= $#identifiers; $i++ ) {
	$s .= "." . $identifiers[$i]->{BASE};

	# Add the numerical modifier if it is greater than 0.  
	if( $identifiers[$i]->{INDEX} > 0 ) {
	    $s .= "[" . $identifiers[$i]->{INDEX} . "]";
	}
    }

    return($s);
}


sub p_warning($) {
    my ($warning) = @_;

    if( $DOMDOC_WARNING ) {
#	print STDERR "$warning";
	if( $DOMDOC_WARNING_FATAL ) {
	    Carp::croak($warning); #Backtrace w/ die
	} else {
            Carp::cluck($warning); #Backtrace w/o die
        }
    }
}

sub isSubTree() {
    my ($self) = shift;
    if ( !defined($self->{IS_SUBTREE}) ) {
        # this caching doesn't seem to have much effect, 
        # but well, it doesn't hurt -- mgu
        $self->{IS_SUBTREE} = $self->{DOC}->getNodeType() == XML::DOM::ELEMENT_NODE;
    }
    return $self->{IS_SUBTREE};
}

# Added to detect memory leaks.
#my $DESTROY_COUNT = 1;
#sub DESTROY {
#    my $self = shift;
#    warn "DESTROYING ($DESTROY_COUNT) $self";
#    $DESTROY_COUNT++;
#}


1;

