Annotation of FM2SQL/com/exploringxml/xml/Xparse.java, revision 1.1.1.1
1.1 rogo 1: /* Copyright (c) 2000 Michael Claßen <mclassen@internet.com>
2: *
3: * This program is free software; you can redistribute it and/or
4: * modify it under the terms of the GNU General Public License
5: * as published by the Free Software Foundation; either version 2
6: * of the License, or (at your option) any later version.
7: *
8: * This program is distributed in the hope that it will be useful,
9: * but WITHOUT ANY WARRANTY; without even the implied warranty of
10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11: * GNU General Public License for more details.
12: *
13: * You should have received a copy of the GNU General Public License
14: * along with this program; if not, write to the Free Software
15: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16: *
17: * $Id: Xparse.java,v 1.1 2000/04/03 07:43:37 classen Exp $
18: */
19:
20: // Derived from Javascript version:
21:
22: // Ver .91 Feb 21 1998
23: //////////////////////////////////////////////////////////////
24: //
25: // Copyright 1998 Jeremie
26: // Free for public non-commercial use and modification
27: // as long as this header is kept intact and unmodified.
28: // Please see http://www.jeremie.com for more information
29: // or email jer@jeremie.com with questions/suggestions.
30: //
31: ///////////////////////////////////////////////////////////////
32: ///////////////////////////////////////////////////////////////
33: ////////// Simple XML Processing Library //////////////////////
34: ///////////////////////////////////////////////////////////////
35: ///////////////////////////////////////////////////////////////
36: //// Fully complies to the XML 1.0 spec
37: //// as a well-formed processor, with the
38: //// exception of full error reporting and
39: //// the document type declaration(and it's
40: //// related features, internal entities, etc).
41: ///////////////////////////////////////////////////////////////
42:
43: package com.exploringxml.xml;
44:
45: import java.util.Hashtable;
46:
47: /**
48: * Simple XML parser derived from the XParse Javascript parser;
49: * Please see http://www.jeremie.com for more information on this.
50: * Quoting Jeremie:
51: * "Fully complies to the XML 1.0 spec
52: * as a well-formed processor, with the
53: * exception of full error reporting and
54: * the document type declaration(and it's
55: * related features, internal entities, etc)."
56: *
57: * @author Michael Claßen
58: * @version $Revision: 1.1 $
59: */
60: public class Xparse {
61:
62: /**
63: * Helper function for matching Javascript's definition
64: * of the substring function to not cause an IndexOutOfBoundsException
65: * when length exceeds string length but return the remainder
66: * of the string instead MC20001214
67: *
68: * @param s the string to slice
69: * @param start the starting position within s
70: * @param length the number of characters to slice
71: * @return the substring
72: */
73: private String substring(String s, int start, int length) {
74: if (s.length() > start + length)
75: return s.substring(start, length);
76: else
77: return s.substring(start);
78: }
79:
80: /** an internal fragment that is passed between functions
81: */
82: class Frag {
83: public String str;
84: public JSArray ary;
85: public String end;
86: public Frag()
87: {
88: this.str = new String();
89: this.ary = new JSArray();
90: this.end = new String();
91: }
92: }
93:
94: // global vars to track element UID's for the index
95: static int count = 0;
96: static JSArray index = new JSArray();
97: public boolean changeEntities=false;// by rogo
98: /**
99: * Main public function that is called to
100: * parse the XML string and return a root element object
101: *
102: * @param src the object's index in the array
103: * @return the parsed XML's root Node
104: */
105: public Node parse(String src) {
106:
107: count = 0;
108: index = new JSArray();
109:
110: Frag frag = new Frag();
111:
112: // remove bad \r characters and the prolog
113: frag.str = prolog(src);
114:
115: // create a root element to contain the document
116: Node root = Node.createRootelement();
117: root.name="ROOT";
118:
119: // main recursive function to process the xml
120: frag = compile(frag);
121:
122: // all done, lets return the root element + index + document
123: root.contents = frag.ary;
124: root.index = index;
125: index = new JSArray();
126: return root;
127: }
128:
129: /**
130: * transforms raw text input into a multilevel JSArray
131: *
132: * @param frag the input fragment
133: * @return the output fragment
134: */
135: Frag compile(Frag frag) {
136:
137: // keep circling and eating the str
138: while(true)
139: {
140: // when the str is empty, return the fragment
141: if(frag.str.length() == 0)
142: {
143: return frag;
144: }
145:
146: int TagStart = frag.str.indexOf("<");
147:
148: if(TagStart != 0)
149: {
150: // theres a chunk of characters here, store it and go on
151: int thisary = frag.ary.length();
152: frag.ary.setElementAt(Node.createChardata(), thisary);
153: if(TagStart == -1)
154: {
155: frag.ary.setElementAt(entity(frag.str), thisary, JSArray.Value);
156: frag.str = "";
157: }
158: else
159: {
160: frag.ary.setElementAt(entity(substring(frag.str,0,TagStart)), thisary, JSArray.Value);
161: frag.str = substring(frag.str,TagStart,frag.str.length());
162: }
163: }
164: else
165: {
166: // determine what the next section is, and process it
167: if(substring(frag.str,1,2).equals("?"))
168: {
169: frag = tagPI(frag);
170: }
171: else
172: {
173: if(substring(frag.str,1,4).equals("!--"))
174: {
175: frag = tagComment(frag);
176: }
177: else
178: {
179: if(substring(frag.str,1,9).equals("![CDATA["))
180: {
181: frag = tagCData(frag);
182: }
183: else
184: {
185: if(substring(frag.str,1,frag.end.length() + 3).equals("/" + frag.end + ">") || strip(substring(frag.str,1,frag.end.length() + 3)).equals("/" + frag.end))
186: {
187: // found the end of the current tag, end the recursive process and return
188: frag.str = substring(frag.str,frag.end.length() + 3,frag.str.length());
189: frag.end = "";
190: return frag;
191: }
192: else
193: {
194: frag = tagElement(frag);
195: }
196: }
197: }
198: }
199:
200: }
201: }
202: //MC return "";
203: }
204:
205: //// functions to process different tags
206:
207: /**
208: * process an XML element
209: *
210: * @param frag the input fragment
211: * @return the output fragment
212: */
213: Frag tagElement(Frag frag)
214: {
215: // initialize some temporary variables for manipulating the tag
216: int close = frag.str.indexOf(">");
217: boolean empty = (substring(frag.str,close - 1, close).equals("/"));
218: if(empty)
219: {
220: close -= 1;
221: }
222:
223: // split up the name and attributes
224: String starttag = normalize(substring(frag.str,1,close));
225: int nextspace = starttag.indexOf(" ");
226: String attribs = new String();
227: String name = new String();
228: if(nextspace != -1)
229: {
230: name = starttag.substring(0,nextspace);
231: attribs = starttag.substring(nextspace + 1,starttag.length());
232: }
233: else
234: {
235: name = starttag;
236: }
237:
238: int thisary = frag.ary.length();
239: frag.ary.setElementAt(Node.createElement(), thisary);
240: frag.ary.setElementAt(strip(name), thisary, JSArray.Name);
241: if(attribs.length() > 0)
242: {
243: frag.ary.setElementAt(attribution(attribs), thisary, JSArray.Attributes);
244: }
245: if(!empty)
246: {
247: // !!!! important,
248: // take the contents of the tag and parse them
249: Frag contents = new Frag();
250: contents.str = substring(frag.str,close + 1,frag.str.length());
251: contents.end = name;
252: contents = compile(contents);
253: frag.ary.setElementAt(contents.ary, thisary, JSArray.Contents);
254: frag.str = contents.str;
255: }
256: else
257: {
258: frag.str = substring(frag.str,close + 2,frag.str.length());
259: }
260: return frag;
261: }
262:
263: /**
264: * process an XML processing instruction (PI)
265: *
266: * @param frag the input fragment
267: * @return the output fragment
268: */
269: Frag tagPI(Frag frag)
270: {
271: int close = frag.str.indexOf("?>");
272: String val = substring(frag.str,2, close);
273: int thisary = frag.ary.length();
274: frag.ary.setElementAt(Node.createPi(), thisary);
275: frag.ary.setElementAt(val, thisary, JSArray.Value);
276: frag.str = substring(frag.str,close + 2, frag.str.length());
277: return frag;
278: }
279:
280: /**
281: * process an XML comment
282: *
283: * @param frag the input fragment
284: * @return the output fragment
285: */
286: Frag tagComment(Frag frag)
287: {
288: int close = frag.str.indexOf("-->");
289: String val = substring(frag.str,4, close);
290: int thisary = frag.ary.length();
291: frag.ary.setElementAt(Node.createComment(), thisary);
292: frag.ary.setElementAt(val, thisary, JSArray.Value);
293: frag.str = substring(frag.str,close + 3, frag.str.length());
294: return frag;
295: }
296:
297: /**
298: * process XML character data (CDATA)
299: *
300: * @param frag the input fragment
301: * @return the output fragment
302: */
303: Frag tagCData(Frag frag)
304: {
305: int close = frag.str.indexOf("]]>");
306: String val = substring(frag.str,9, close);
307: int thisary = frag.ary.length();
308: frag.ary.setElementAt(Node.createChardata(), thisary);
309: frag.ary.setElementAt(val, thisary, JSArray.Value);
310: frag.str = substring(frag.str,close + 3, frag.str.length());
311: return frag;
312: }
313:
314: /**
315: * util for element attribute parsing
316: *
317: * @param attribute string
318: * @return an JSArray of all of the keys = values
319: */
320: Hashtable attribution(String str)
321: {
322: Hashtable all = new Hashtable();
323: while(true)
324: {
325: int eq = str.indexOf("=");
326: if(str.length() == 0 || eq == -1)
327: {
328: return all;
329: }
330:
331: int id1 = str.indexOf("\'");
332: int id2 = str.indexOf("\"");
333: int ids = 0; //MC = new Number();
334: String id = new String();
335: if((id1 < id2 && id1 != -1) || id2 == -1)
336: {
337: ids = id1;
338: id = "\'";
339: }
340: if((id2 < id1 || id1 == -1) && id2 != -1)
341: {
342: ids = id2;
343: id = "\"";
344: }
345: int nextid = str.indexOf(id,ids + 1);
346: String val = str.substring(ids + 1,nextid);
347:
348: String name = strip(str.substring(0,eq));
349: all.put(name, entity(val));
350: str = str.substring(nextid + 1,str.length());
351: }
352: //MC return "";
353: }
354:
355: /**
356: * util to remove \r characters from input string
357: *
358: * @param attribute string
359: * @return the xml string without a prolog
360: */
361: String prolog(String str)
362: {
363: JSArray a = new JSArray();
364:
365: a.split(str, "\r\n");
366: str = a.join("\n");
367: a.split(str, "\r");
368: str = a.join("\n");
369:
370: int start = str.indexOf("<");
371: if(str.substring(start,start + 3).equals("<?x") || str.substring(start,start + 3).equals("<?X") )
372: {
373: int close = str.indexOf("?>");
374: str = str.substring(close + 2,str.length());
375: }
376: start = str.indexOf("<!DOCTYPE");
377: if(start != -1)
378: {
379: int close = str.indexOf(">",start) + 1;
380: int dp = str.indexOf("[",start);
381: if(dp < close && dp != -1)
382: {
383: close = str.indexOf("]>",start) + 2;
384: }
385: str = str.substring(close,str.length());
386: }
387: return str;
388: }
389:
390: /**
391: * util to remove white characters from input string
392: *
393: * @param string
394: * @return stripped string
395: */
396: String strip(String str)
397: {
398: JSArray A = new JSArray();
399:
400: A.split(str, "\n");
401: str = A.join("");
402: A.split(str, " ");
403: str = A.join("");
404: A.split(str, "\t");
405: str = A.join("");
406:
407: return str;
408: }
409:
410: /**
411: * util to replace white characters in input string
412: *
413: * @param string
414: * @return normalized string
415: */
416: String normalize(String str)
417: {
418: JSArray A = new JSArray();
419:
420: A.split(str, "\n");
421: str = A.join(" ");
422: A.split(str, "\t");
423: str = A.join(" ");
424:
425: return str;
426: }
427:
428: /**
429: * util to replace internal entities in input string
430: *
431: * @param string
432: * @return string with replaced entitities
433: */
434: String entity(String str)
435: {
436: if(!changeEntities) return str;//by rogo
437: JSArray A = new JSArray();
438:
439: A.split(str, "<");
440: str = A.join("<");
441: A.split(str, ">");
442: str = A.join(">");
443: A.split(str, """);
444: str = A.join("\"");
445: A.split(str, "'");
446: str = A.join("\'");
447: A.split(str, "&");
448: str = A.join("&");
449:
450: return str;
451: }
452:
453: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>