edoc-applet/com/exploringxml/xml/Xparse.java - view

File: [Repository] / edoc-applet / com / exploringxml / xml / Xparse.java
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Jun 17 10:38:55 2003 UTC (21 years ago) by rogo
Branches: vend, MAIN
CVS tags: start, HEAD

First checkin.

/* Copyright (c) 2000 Michael Claßen <mclassen@internet.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * $Id: Xparse.java,v 1.1.1.1 2003/06/17 10:38:55 rogo Exp $ */ // Derived from Javascript version: // Ver .91 Feb 21 1998 ////////////////////////////////////////////////////////////// // // Copyright 1998 Jeremie // Free for public non-commercial use and modification // as long as this header is kept intact and unmodified. // Please see http://www.jeremie.com for more information // or email jer@jeremie.com with questions/suggestions. // /////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////// ////////// Simple XML Processing Library ////////////////////// /////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////// //// Fully complies to the XML 1.0 spec //// as a well-formed processor, with the //// exception of full error reporting and //// the document type declaration(and it's //// related features, internal entities, etc). /////////////////////////////////////////////////////////////// package com.exploringxml.xml; import java.util.Hashtable; /** * Simple XML parser derived from the XParse Javascript parser; * Please see http://www.jeremie.com for more information on this. * Quoting Jeremie: * "Fully complies to the XML 1.0 spec * as a well-formed processor, with the * exception of full error reporting and * the document type declaration(and it's * related features, internal entities, etc)." * * @author Michael Claßen * @version $Revision: 1.1.1.1 $ */ public class Xparse { /** * Helper function for matching Javascript's definition * of the substring function to not cause an IndexOutOfBoundsException * when length exceeds string length but return the remainder * of the string instead MC20001214 * * @param s the string to slice * @param start the starting position within s * @param length the number of characters to slice * @return the substring */ private String substring(String s, int start, int length) { if (s.length() > start + length) return s.substring(start, length); else return s.substring(start); } /** an internal fragment that is passed between functions */ class Frag { public String str; public JSArray ary; public String end; public Frag() { this.str = new String(); this.ary = new JSArray(); this.end = new String(); } } // global vars to track element UID's for the index static int count = 0; static JSArray index = new JSArray(); public boolean changeEntities=false;// by rogo /** * Main public function that is called to * parse the XML string and return a root element object * * @param src the object's index in the array * @return the parsed XML's root Node */ public Node parse(String src) { count = 0; index = new JSArray(); Frag frag = new Frag(); // remove bad \r characters and the prolog frag.str = prolog(src); // create a root element to contain the document Node root = Node.createRootelement(); root.name="ROOT"; // main recursive function to process the xml frag = compile(frag); // all done, lets return the root element + index + document root.contents = frag.ary; root.index = index; index = new JSArray(); return root; } /** * transforms raw text input into a multilevel JSArray * * @param frag the input fragment * @return the output fragment */ Frag compile(Frag frag) { // keep circling and eating the str while(true) { // when the str is empty, return the fragment if(frag.str.length() == 0) { return frag; } int TagStart = frag.str.indexOf("<"); if(TagStart != 0) { // theres a chunk of characters here, store it and go on int thisary = frag.ary.length(); frag.ary.setElementAt(Node.createChardata(), thisary); if(TagStart == -1) { frag.ary.setElementAt(entity(frag.str), thisary, JSArray.Value); frag.str = ""; } else { frag.ary.setElementAt(entity(substring(frag.str,0,TagStart)), thisary, JSArray.Value); frag.str = substring(frag.str,TagStart,frag.str.length()); } } else { // determine what the next section is, and process it if(substring(frag.str,1,2).equals("?")) { frag = tagPI(frag); } else { if(substring(frag.str,1,4).equals("!--")) { frag = tagComment(frag); } else { if(substring(frag.str,1,9).equals("![CDATA[")) { frag = tagCData(frag); } else { if(substring(frag.str,1,frag.end.length() + 3).equals("/" + frag.end + ">") || strip(substring(frag.str,1,frag.end.length() + 3)).equals("/" + frag.end)) { // found the end of the current tag, end the recursive process and return frag.str = substring(frag.str,frag.end.length() + 3,frag.str.length()); frag.end = ""; return frag; } else { frag = tagElement(frag); } } } } } } //MC return ""; } //// functions to process different tags /** * process an XML element * * @param frag the input fragment * @return the output fragment */ Frag tagElement(Frag frag) { // initialize some temporary variables for manipulating the tag int close = frag.str.indexOf(">"); boolean empty = (substring(frag.str,close - 1, close).equals("/")); if(empty) { close -= 1; } // split up the name and attributes String starttag = normalize(substring(frag.str,1,close)); int nextspace = starttag.indexOf(" "); String attribs = new String(); String name = new String(); if(nextspace != -1) { name = starttag.substring(0,nextspace); attribs = starttag.substring(nextspace + 1,starttag.length()); } else { name = starttag; } int thisary = frag.ary.length(); frag.ary.setElementAt(Node.createElement(), thisary); frag.ary.setElementAt(strip(name), thisary, JSArray.Name); if(attribs.length() > 0) { frag.ary.setElementAt(attribution(attribs), thisary, JSArray.Attributes); } if(!empty) { // !!!! important, // take the contents of the tag and parse them Frag contents = new Frag(); contents.str = substring(frag.str,close + 1,frag.str.length()); contents.end = name; contents = compile(contents); frag.ary.setElementAt(contents.ary, thisary, JSArray.Contents); frag.str = contents.str; } else { frag.str = substring(frag.str,close + 2,frag.str.length()); } return frag; } /** * process an XML processing instruction (PI) * * @param frag the input fragment * @return the output fragment */ Frag tagPI(Frag frag) { int close = frag.str.indexOf("?>"); String val = substring(frag.str,2, close); int thisary = frag.ary.length(); frag.ary.setElementAt(Node.createPi(), thisary); frag.ary.setElementAt(val, thisary, JSArray.Value); frag.str = substring(frag.str,close + 2, frag.str.length()); return frag; } /** * process an XML comment * * @param frag the input fragment * @return the output fragment */ Frag tagComment(Frag frag) { int close = frag.str.indexOf("-->"); String val = substring(frag.str,4, close); int thisary = frag.ary.length(); frag.ary.setElementAt(Node.createComment(), thisary); frag.ary.setElementAt(val, thisary, JSArray.Value); frag.str = substring(frag.str,close + 3, frag.str.length()); return frag; } /** * process XML character data (CDATA) * * @param frag the input fragment * @return the output fragment */ Frag tagCData(Frag frag) { int close = frag.str.indexOf("]]>"); String val = substring(frag.str,9, close); int thisary = frag.ary.length(); frag.ary.setElementAt(Node.createChardata(), thisary); frag.ary.setElementAt(val, thisary, JSArray.Value); frag.str = substring(frag.str,close + 3, frag.str.length()); return frag; } /** * util for element attribute parsing * * @param attribute string * @return an JSArray of all of the keys = values */ Hashtable attribution(String str) { Hashtable all = new Hashtable(); while(true) { int eq = str.indexOf("="); if(str.length() == 0 || eq == -1) { return all; } int id1 = str.indexOf("\'"); int id2 = str.indexOf("\""); int ids = 0; //MC = new Number(); String id = new String(); if((id1 < id2 && id1 != -1) || id2 == -1) { ids = id1; id = "\'"; } if((id2 < id1 || id1 == -1) && id2 != -1) { ids = id2; id = "\""; } int nextid = str.indexOf(id,ids + 1); String val = str.substring(ids + 1,nextid); String name = strip(str.substring(0,eq)); all.put(name, entity(val)); str = str.substring(nextid + 1,str.length()); } //MC return ""; } /** * util to remove \r characters from input string * * @param attribute string * @return the xml string without a prolog */ String prolog(String str) { JSArray a = new JSArray(); a.split(str, "\r\n"); str = a.join("\n"); a.split(str, "\r"); str = a.join("\n"); int start = str.indexOf("<"); if(str.substring(start,start + 3).equals("<?x") || str.substring(start,start + 3).equals("<?X") ) { int close = str.indexOf("?>"); str = str.substring(close + 2,str.length()); } start = str.indexOf("<!DOCTYPE"); if(start != -1) { int close = str.indexOf(">",start) + 1; int dp = str.indexOf("[",start); if(dp < close && dp != -1) { close = str.indexOf("]>",start) + 2; } str = str.substring(close,str.length()); } return str; } /** * util to remove white characters from input string * * @param string * @return stripped string */ String strip(String str) { JSArray A = new JSArray(); A.split(str, "\n"); str = A.join(""); A.split(str, " "); str = A.join(""); A.split(str, "\t"); str = A.join(""); return str; } /** * util to replace white characters in input string * * @param string * @return normalized string */ String normalize(String str) { JSArray A = new JSArray(); A.split(str, "\n"); str = A.join(" "); A.split(str, "\t"); str = A.join(" "); return str; } /** * util to replace internal entities in input string * * @param string * @return string with replaced entitities */ String entity(String str) { if(!changeEntities) return str;//by rogo JSArray A = new JSArray(); A.split(str, "<"); str = A.join("<"); A.split(str, ">"); str = A.join(">"); A.split(str, """); str = A.join("\""); A.split(str, "'"); str = A.join("\'"); A.split(str, "&"); str = A.join("&"); return str; } }