/* Copyright (c) 2000 Michael Claßen <mclassen@internet.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* $Id: Xparse.java,v 1.1.1.1 2003/06/17 10:38:55 rogo Exp $
*/
// Derived from Javascript version:
// Ver .91 Feb 21 1998
//////////////////////////////////////////////////////////////
//
// Copyright 1998 Jeremie
// Free for public non-commercial use and modification
// as long as this header is kept intact and unmodified.
// Please see http://www.jeremie.com for more information
// or email jer@jeremie.com with questions/suggestions.
//
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
////////// Simple XML Processing Library //////////////////////
///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////
//// Fully complies to the XML 1.0 spec
//// as a well-formed processor, with the
//// exception of full error reporting and
//// the document type declaration(and it's
//// related features, internal entities, etc).
///////////////////////////////////////////////////////////////
package com.exploringxml.xml;
import java.util.Hashtable;
/**
* Simple XML parser derived from the XParse Javascript parser;
* Please see http://www.jeremie.com for more information on this.
* Quoting Jeremie:
* "Fully complies to the XML 1.0 spec
* as a well-formed processor, with the
* exception of full error reporting and
* the document type declaration(and it's
* related features, internal entities, etc)."
*
* @author Michael Claßen
* @version $Revision: 1.1.1.1 $
*/
public class Xparse {
/**
* Helper function for matching Javascript's definition
* of the substring function to not cause an IndexOutOfBoundsException
* when length exceeds string length but return the remainder
* of the string instead MC20001214
*
* @param s the string to slice
* @param start the starting position within s
* @param length the number of characters to slice
* @return the substring
*/
private String substring(String s, int start, int length) {
if (s.length() > start + length)
return s.substring(start, length);
else
return s.substring(start);
}
/** an internal fragment that is passed between functions
*/
class Frag {
public String str;
public JSArray ary;
public String end;
public Frag()
{
this.str = new String();
this.ary = new JSArray();
this.end = new String();
}
}
// global vars to track element UID's for the index
static int count = 0;
static JSArray index = new JSArray();
public boolean changeEntities=false;// by rogo
/**
* Main public function that is called to
* parse the XML string and return a root element object
*
* @param src the object's index in the array
* @return the parsed XML's root Node
*/
public Node parse(String src) {
count = 0;
index = new JSArray();
Frag frag = new Frag();
// remove bad \r characters and the prolog
frag.str = prolog(src);
// create a root element to contain the document
Node root = Node.createRootelement();
root.name="ROOT";
// main recursive function to process the xml
frag = compile(frag);
// all done, lets return the root element + index + document
root.contents = frag.ary;
root.index = index;
index = new JSArray();
return root;
}
/**
* transforms raw text input into a multilevel JSArray
*
* @param frag the input fragment
* @return the output fragment
*/
Frag compile(Frag frag) {
// keep circling and eating the str
while(true)
{
// when the str is empty, return the fragment
if(frag.str.length() == 0)
{
return frag;
}
int TagStart = frag.str.indexOf("<");
if(TagStart != 0)
{
// theres a chunk of characters here, store it and go on
int thisary = frag.ary.length();
frag.ary.setElementAt(Node.createChardata(), thisary);
if(TagStart == -1)
{
frag.ary.setElementAt(entity(frag.str), thisary, JSArray.Value);
frag.str = "";
}
else
{
frag.ary.setElementAt(entity(substring(frag.str,0,TagStart)), thisary, JSArray.Value);
frag.str = substring(frag.str,TagStart,frag.str.length());
}
}
else
{
// determine what the next section is, and process it
if(substring(frag.str,1,2).equals("?"))
{
frag = tagPI(frag);
}
else
{
if(substring(frag.str,1,4).equals("!--"))
{
frag = tagComment(frag);
}
else
{
if(substring(frag.str,1,9).equals("![CDATA["))
{
frag = tagCData(frag);
}
else
{
if(substring(frag.str,1,frag.end.length() + 3).equals("/" + frag.end + ">") || strip(substring(frag.str,1,frag.end.length() + 3)).equals("/" + frag.end))
{
// found the end of the current tag, end the recursive process and return
frag.str = substring(frag.str,frag.end.length() + 3,frag.str.length());
frag.end = "";
return frag;
}
else
{
frag = tagElement(frag);
}
}
}
}
}
}
//MC return "";
}
//// functions to process different tags
/**
* process an XML element
*
* @param frag the input fragment
* @return the output fragment
*/
Frag tagElement(Frag frag)
{
// initialize some temporary variables for manipulating the tag
int close = frag.str.indexOf(">");
boolean empty = (substring(frag.str,close - 1, close).equals("/"));
if(empty)
{
close -= 1;
}
// split up the name and attributes
String starttag = normalize(substring(frag.str,1,close));
int nextspace = starttag.indexOf(" ");
String attribs = new String();
String name = new String();
if(nextspace != -1)
{
name = starttag.substring(0,nextspace);
attribs = starttag.substring(nextspace + 1,starttag.length());
}
else
{
name = starttag;
}
int thisary = frag.ary.length();
frag.ary.setElementAt(Node.createElement(), thisary);
frag.ary.setElementAt(strip(name), thisary, JSArray.Name);
if(attribs.length() > 0)
{
frag.ary.setElementAt(attribution(attribs), thisary, JSArray.Attributes);
}
if(!empty)
{
// !!!! important,
// take the contents of the tag and parse them
Frag contents = new Frag();
contents.str = substring(frag.str,close + 1,frag.str.length());
contents.end = name;
contents = compile(contents);
frag.ary.setElementAt(contents.ary, thisary, JSArray.Contents);
frag.str = contents.str;
}
else
{
frag.str = substring(frag.str,close + 2,frag.str.length());
}
return frag;
}
/**
* process an XML processing instruction (PI)
*
* @param frag the input fragment
* @return the output fragment
*/
Frag tagPI(Frag frag)
{
int close = frag.str.indexOf("?>");
String val = substring(frag.str,2, close);
int thisary = frag.ary.length();
frag.ary.setElementAt(Node.createPi(), thisary);
frag.ary.setElementAt(val, thisary, JSArray.Value);
frag.str = substring(frag.str,close + 2, frag.str.length());
return frag;
}
/**
* process an XML comment
*
* @param frag the input fragment
* @return the output fragment
*/
Frag tagComment(Frag frag)
{
int close = frag.str.indexOf("-->");
String val = substring(frag.str,4, close);
int thisary = frag.ary.length();
frag.ary.setElementAt(Node.createComment(), thisary);
frag.ary.setElementAt(val, thisary, JSArray.Value);
frag.str = substring(frag.str,close + 3, frag.str.length());
return frag;
}
/**
* process XML character data (CDATA)
*
* @param frag the input fragment
* @return the output fragment
*/
Frag tagCData(Frag frag)
{
int close = frag.str.indexOf("]]>");
String val = substring(frag.str,9, close);
int thisary = frag.ary.length();
frag.ary.setElementAt(Node.createChardata(), thisary);
frag.ary.setElementAt(val, thisary, JSArray.Value);
frag.str = substring(frag.str,close + 3, frag.str.length());
return frag;
}
/**
* util for element attribute parsing
*
* @param attribute string
* @return an JSArray of all of the keys = values
*/
Hashtable attribution(String str)
{
Hashtable all = new Hashtable();
while(true)
{
int eq = str.indexOf("=");
if(str.length() == 0 || eq == -1)
{
return all;
}
int id1 = str.indexOf("\'");
int id2 = str.indexOf("\"");
int ids = 0; //MC = new Number();
String id = new String();
if((id1 < id2 && id1 != -1) || id2 == -1)
{
ids = id1;
id = "\'";
}
if((id2 < id1 || id1 == -1) && id2 != -1)
{
ids = id2;
id = "\"";
}
int nextid = str.indexOf(id,ids + 1);
String val = str.substring(ids + 1,nextid);
String name = strip(str.substring(0,eq));
all.put(name, entity(val));
str = str.substring(nextid + 1,str.length());
}
//MC return "";
}
/**
* util to remove \r characters from input string
*
* @param attribute string
* @return the xml string without a prolog
*/
String prolog(String str)
{
JSArray a = new JSArray();
a.split(str, "\r\n");
str = a.join("\n");
a.split(str, "\r");
str = a.join("\n");
int start = str.indexOf("<");
if(str.substring(start,start + 3).equals("<?x") || str.substring(start,start + 3).equals("<?X") )
{
int close = str.indexOf("?>");
str = str.substring(close + 2,str.length());
}
start = str.indexOf("<!DOCTYPE");
if(start != -1)
{
int close = str.indexOf(">",start) + 1;
int dp = str.indexOf("[",start);
if(dp < close && dp != -1)
{
close = str.indexOf("]>",start) + 2;
}
str = str.substring(close,str.length());
}
return str;
}
/**
* util to remove white characters from input string
*
* @param string
* @return stripped string
*/
String strip(String str)
{
JSArray A = new JSArray();
A.split(str, "\n");
str = A.join("");
A.split(str, " ");
str = A.join("");
A.split(str, "\t");
str = A.join("");
return str;
}
/**
* util to replace white characters in input string
*
* @param string
* @return normalized string
*/
String normalize(String str)
{
JSArray A = new JSArray();
A.split(str, "\n");
str = A.join(" ");
A.split(str, "\t");
str = A.join(" ");
return str;
}
/**
* util to replace internal entities in input string
*
* @param string
* @return string with replaced entitities
*/
String entity(String str)
{
if(!changeEntities) return str;//by rogo
JSArray A = new JSArray();
A.split(str, "<");
str = A.join("<");
A.split(str, ">");
str = A.join(">");
A.split(str, """);
str = A.join("\"");
A.split(str, "'");
str = A.join("\'");
A.split(str, "&");
str = A.join("&");
return str;
}
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>