Annotation of FM2SQL/com/exploringxml/xml/Xparse.java, revision 1.1
1.1 ! rogo 1: /* Copyright (c) 2000 Michael Claßen <mclassen@internet.com>
! 2: *
! 3: * This program is free software; you can redistribute it and/or
! 4: * modify it under the terms of the GNU General Public License
! 5: * as published by the Free Software Foundation; either version 2
! 6: * of the License, or (at your option) any later version.
! 7: *
! 8: * This program is distributed in the hope that it will be useful,
! 9: * but WITHOUT ANY WARRANTY; without even the implied warranty of
! 10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 11: * GNU General Public License for more details.
! 12: *
! 13: * You should have received a copy of the GNU General Public License
! 14: * along with this program; if not, write to the Free Software
! 15: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
! 16: *
! 17: * $Id: Xparse.java,v 1.1 2000/04/03 07:43:37 classen Exp $
! 18: */
! 19:
! 20: // Derived from Javascript version:
! 21:
! 22: // Ver .91 Feb 21 1998
! 23: //////////////////////////////////////////////////////////////
! 24: //
! 25: // Copyright 1998 Jeremie
! 26: // Free for public non-commercial use and modification
! 27: // as long as this header is kept intact and unmodified.
! 28: // Please see http://www.jeremie.com for more information
! 29: // or email jer@jeremie.com with questions/suggestions.
! 30: //
! 31: ///////////////////////////////////////////////////////////////
! 32: ///////////////////////////////////////////////////////////////
! 33: ////////// Simple XML Processing Library //////////////////////
! 34: ///////////////////////////////////////////////////////////////
! 35: ///////////////////////////////////////////////////////////////
! 36: //// Fully complies to the XML 1.0 spec
! 37: //// as a well-formed processor, with the
! 38: //// exception of full error reporting and
! 39: //// the document type declaration(and it's
! 40: //// related features, internal entities, etc).
! 41: ///////////////////////////////////////////////////////////////
! 42:
! 43: package com.exploringxml.xml;
! 44:
! 45: import java.util.Hashtable;
! 46:
! 47: /**
! 48: * Simple XML parser derived from the XParse Javascript parser;
! 49: * Please see http://www.jeremie.com for more information on this.
! 50: * Quoting Jeremie:
! 51: * "Fully complies to the XML 1.0 spec
! 52: * as a well-formed processor, with the
! 53: * exception of full error reporting and
! 54: * the document type declaration(and it's
! 55: * related features, internal entities, etc)."
! 56: *
! 57: * @author Michael Claßen
! 58: * @version $Revision: 1.1 $
! 59: */
! 60: public class Xparse {
! 61:
! 62: /**
! 63: * Helper function for matching Javascript's definition
! 64: * of the substring function to not cause an IndexOutOfBoundsException
! 65: * when length exceeds string length but return the remainder
! 66: * of the string instead MC20001214
! 67: *
! 68: * @param s the string to slice
! 69: * @param start the starting position within s
! 70: * @param length the number of characters to slice
! 71: * @return the substring
! 72: */
! 73: private String substring(String s, int start, int length) {
! 74: if (s.length() > start + length)
! 75: return s.substring(start, length);
! 76: else
! 77: return s.substring(start);
! 78: }
! 79:
! 80: /** an internal fragment that is passed between functions
! 81: */
! 82: class Frag {
! 83: public String str;
! 84: public JSArray ary;
! 85: public String end;
! 86: public Frag()
! 87: {
! 88: this.str = new String();
! 89: this.ary = new JSArray();
! 90: this.end = new String();
! 91: }
! 92: }
! 93:
! 94: // global vars to track element UID's for the index
! 95: static int count = 0;
! 96: static JSArray index = new JSArray();
! 97: public boolean changeEntities=false;// by rogo
! 98: /**
! 99: * Main public function that is called to
! 100: * parse the XML string and return a root element object
! 101: *
! 102: * @param src the object's index in the array
! 103: * @return the parsed XML's root Node
! 104: */
! 105: public Node parse(String src) {
! 106:
! 107: count = 0;
! 108: index = new JSArray();
! 109:
! 110: Frag frag = new Frag();
! 111:
! 112: // remove bad \r characters and the prolog
! 113: frag.str = prolog(src);
! 114:
! 115: // create a root element to contain the document
! 116: Node root = Node.createRootelement();
! 117: root.name="ROOT";
! 118:
! 119: // main recursive function to process the xml
! 120: frag = compile(frag);
! 121:
! 122: // all done, lets return the root element + index + document
! 123: root.contents = frag.ary;
! 124: root.index = index;
! 125: index = new JSArray();
! 126: return root;
! 127: }
! 128:
! 129: /**
! 130: * transforms raw text input into a multilevel JSArray
! 131: *
! 132: * @param frag the input fragment
! 133: * @return the output fragment
! 134: */
! 135: Frag compile(Frag frag) {
! 136:
! 137: // keep circling and eating the str
! 138: while(true)
! 139: {
! 140: // when the str is empty, return the fragment
! 141: if(frag.str.length() == 0)
! 142: {
! 143: return frag;
! 144: }
! 145:
! 146: int TagStart = frag.str.indexOf("<");
! 147:
! 148: if(TagStart != 0)
! 149: {
! 150: // theres a chunk of characters here, store it and go on
! 151: int thisary = frag.ary.length();
! 152: frag.ary.setElementAt(Node.createChardata(), thisary);
! 153: if(TagStart == -1)
! 154: {
! 155: frag.ary.setElementAt(entity(frag.str), thisary, JSArray.Value);
! 156: frag.str = "";
! 157: }
! 158: else
! 159: {
! 160: frag.ary.setElementAt(entity(substring(frag.str,0,TagStart)), thisary, JSArray.Value);
! 161: frag.str = substring(frag.str,TagStart,frag.str.length());
! 162: }
! 163: }
! 164: else
! 165: {
! 166: // determine what the next section is, and process it
! 167: if(substring(frag.str,1,2).equals("?"))
! 168: {
! 169: frag = tagPI(frag);
! 170: }
! 171: else
! 172: {
! 173: if(substring(frag.str,1,4).equals("!--"))
! 174: {
! 175: frag = tagComment(frag);
! 176: }
! 177: else
! 178: {
! 179: if(substring(frag.str,1,9).equals("![CDATA["))
! 180: {
! 181: frag = tagCData(frag);
! 182: }
! 183: else
! 184: {
! 185: if(substring(frag.str,1,frag.end.length() + 3).equals("/" + frag.end + ">") || strip(substring(frag.str,1,frag.end.length() + 3)).equals("/" + frag.end))
! 186: {
! 187: // found the end of the current tag, end the recursive process and return
! 188: frag.str = substring(frag.str,frag.end.length() + 3,frag.str.length());
! 189: frag.end = "";
! 190: return frag;
! 191: }
! 192: else
! 193: {
! 194: frag = tagElement(frag);
! 195: }
! 196: }
! 197: }
! 198: }
! 199:
! 200: }
! 201: }
! 202: //MC return "";
! 203: }
! 204:
! 205: //// functions to process different tags
! 206:
! 207: /**
! 208: * process an XML element
! 209: *
! 210: * @param frag the input fragment
! 211: * @return the output fragment
! 212: */
! 213: Frag tagElement(Frag frag)
! 214: {
! 215: // initialize some temporary variables for manipulating the tag
! 216: int close = frag.str.indexOf(">");
! 217: boolean empty = (substring(frag.str,close - 1, close).equals("/"));
! 218: if(empty)
! 219: {
! 220: close -= 1;
! 221: }
! 222:
! 223: // split up the name and attributes
! 224: String starttag = normalize(substring(frag.str,1,close));
! 225: int nextspace = starttag.indexOf(" ");
! 226: String attribs = new String();
! 227: String name = new String();
! 228: if(nextspace != -1)
! 229: {
! 230: name = starttag.substring(0,nextspace);
! 231: attribs = starttag.substring(nextspace + 1,starttag.length());
! 232: }
! 233: else
! 234: {
! 235: name = starttag;
! 236: }
! 237:
! 238: int thisary = frag.ary.length();
! 239: frag.ary.setElementAt(Node.createElement(), thisary);
! 240: frag.ary.setElementAt(strip(name), thisary, JSArray.Name);
! 241: if(attribs.length() > 0)
! 242: {
! 243: frag.ary.setElementAt(attribution(attribs), thisary, JSArray.Attributes);
! 244: }
! 245: if(!empty)
! 246: {
! 247: // !!!! important,
! 248: // take the contents of the tag and parse them
! 249: Frag contents = new Frag();
! 250: contents.str = substring(frag.str,close + 1,frag.str.length());
! 251: contents.end = name;
! 252: contents = compile(contents);
! 253: frag.ary.setElementAt(contents.ary, thisary, JSArray.Contents);
! 254: frag.str = contents.str;
! 255: }
! 256: else
! 257: {
! 258: frag.str = substring(frag.str,close + 2,frag.str.length());
! 259: }
! 260: return frag;
! 261: }
! 262:
! 263: /**
! 264: * process an XML processing instruction (PI)
! 265: *
! 266: * @param frag the input fragment
! 267: * @return the output fragment
! 268: */
! 269: Frag tagPI(Frag frag)
! 270: {
! 271: int close = frag.str.indexOf("?>");
! 272: String val = substring(frag.str,2, close);
! 273: int thisary = frag.ary.length();
! 274: frag.ary.setElementAt(Node.createPi(), thisary);
! 275: frag.ary.setElementAt(val, thisary, JSArray.Value);
! 276: frag.str = substring(frag.str,close + 2, frag.str.length());
! 277: return frag;
! 278: }
! 279:
! 280: /**
! 281: * process an XML comment
! 282: *
! 283: * @param frag the input fragment
! 284: * @return the output fragment
! 285: */
! 286: Frag tagComment(Frag frag)
! 287: {
! 288: int close = frag.str.indexOf("-->");
! 289: String val = substring(frag.str,4, close);
! 290: int thisary = frag.ary.length();
! 291: frag.ary.setElementAt(Node.createComment(), thisary);
! 292: frag.ary.setElementAt(val, thisary, JSArray.Value);
! 293: frag.str = substring(frag.str,close + 3, frag.str.length());
! 294: return frag;
! 295: }
! 296:
! 297: /**
! 298: * process XML character data (CDATA)
! 299: *
! 300: * @param frag the input fragment
! 301: * @return the output fragment
! 302: */
! 303: Frag tagCData(Frag frag)
! 304: {
! 305: int close = frag.str.indexOf("]]>");
! 306: String val = substring(frag.str,9, close);
! 307: int thisary = frag.ary.length();
! 308: frag.ary.setElementAt(Node.createChardata(), thisary);
! 309: frag.ary.setElementAt(val, thisary, JSArray.Value);
! 310: frag.str = substring(frag.str,close + 3, frag.str.length());
! 311: return frag;
! 312: }
! 313:
! 314: /**
! 315: * util for element attribute parsing
! 316: *
! 317: * @param attribute string
! 318: * @return an JSArray of all of the keys = values
! 319: */
! 320: Hashtable attribution(String str)
! 321: {
! 322: Hashtable all = new Hashtable();
! 323: while(true)
! 324: {
! 325: int eq = str.indexOf("=");
! 326: if(str.length() == 0 || eq == -1)
! 327: {
! 328: return all;
! 329: }
! 330:
! 331: int id1 = str.indexOf("\'");
! 332: int id2 = str.indexOf("\"");
! 333: int ids = 0; //MC = new Number();
! 334: String id = new String();
! 335: if((id1 < id2 && id1 != -1) || id2 == -1)
! 336: {
! 337: ids = id1;
! 338: id = "\'";
! 339: }
! 340: if((id2 < id1 || id1 == -1) && id2 != -1)
! 341: {
! 342: ids = id2;
! 343: id = "\"";
! 344: }
! 345: int nextid = str.indexOf(id,ids + 1);
! 346: String val = str.substring(ids + 1,nextid);
! 347:
! 348: String name = strip(str.substring(0,eq));
! 349: all.put(name, entity(val));
! 350: str = str.substring(nextid + 1,str.length());
! 351: }
! 352: //MC return "";
! 353: }
! 354:
! 355: /**
! 356: * util to remove \r characters from input string
! 357: *
! 358: * @param attribute string
! 359: * @return the xml string without a prolog
! 360: */
! 361: String prolog(String str)
! 362: {
! 363: JSArray a = new JSArray();
! 364:
! 365: a.split(str, "\r\n");
! 366: str = a.join("\n");
! 367: a.split(str, "\r");
! 368: str = a.join("\n");
! 369:
! 370: int start = str.indexOf("<");
! 371: if(str.substring(start,start + 3).equals("<?x") || str.substring(start,start + 3).equals("<?X") )
! 372: {
! 373: int close = str.indexOf("?>");
! 374: str = str.substring(close + 2,str.length());
! 375: }
! 376: start = str.indexOf("<!DOCTYPE");
! 377: if(start != -1)
! 378: {
! 379: int close = str.indexOf(">",start) + 1;
! 380: int dp = str.indexOf("[",start);
! 381: if(dp < close && dp != -1)
! 382: {
! 383: close = str.indexOf("]>",start) + 2;
! 384: }
! 385: str = str.substring(close,str.length());
! 386: }
! 387: return str;
! 388: }
! 389:
! 390: /**
! 391: * util to remove white characters from input string
! 392: *
! 393: * @param string
! 394: * @return stripped string
! 395: */
! 396: String strip(String str)
! 397: {
! 398: JSArray A = new JSArray();
! 399:
! 400: A.split(str, "\n");
! 401: str = A.join("");
! 402: A.split(str, " ");
! 403: str = A.join("");
! 404: A.split(str, "\t");
! 405: str = A.join("");
! 406:
! 407: return str;
! 408: }
! 409:
! 410: /**
! 411: * util to replace white characters in input string
! 412: *
! 413: * @param string
! 414: * @return normalized string
! 415: */
! 416: String normalize(String str)
! 417: {
! 418: JSArray A = new JSArray();
! 419:
! 420: A.split(str, "\n");
! 421: str = A.join(" ");
! 422: A.split(str, "\t");
! 423: str = A.join(" ");
! 424:
! 425: return str;
! 426: }
! 427:
! 428: /**
! 429: * util to replace internal entities in input string
! 430: *
! 431: * @param string
! 432: * @return string with replaced entitities
! 433: */
! 434: String entity(String str)
! 435: {
! 436: if(!changeEntities) return str;//by rogo
! 437: JSArray A = new JSArray();
! 438:
! 439: A.split(str, "<");
! 440: str = A.join("<");
! 441: A.split(str, ">");
! 442: str = A.join(">");
! 443: A.split(str, """);
! 444: str = A.join("\"");
! 445: A.split(str, "'");
! 446: str = A.join("\'");
! 447: A.split(str, "&");
! 448: str = A.join("&");
! 449:
! 450: return str;
! 451: }
! 452:
! 453: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>