Mercurial > hg > fulltextSearchServer
comparison lib/org.json_2.0/src/org/json/XMLTokener.java @ 0:db87c1b7eb6d
initial
author | dwinter |
---|---|
date | Wed, 03 Nov 2010 12:18:46 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:db87c1b7eb6d |
---|---|
1 package org.json; | |
2 | |
3 /* | |
4 Copyright (c) 2002 JSON.org | |
5 | |
6 Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 of this software and associated documentation files (the "Software"), to deal | |
8 in the Software without restriction, including without limitation the rights | |
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 copies of the Software, and to permit persons to whom the Software is | |
11 furnished to do so, subject to the following conditions: | |
12 | |
13 The above copyright notice and this permission notice shall be included in all | |
14 copies or substantial portions of the Software. | |
15 | |
16 The Software shall be used for Good, not Evil. | |
17 | |
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
24 SOFTWARE. | |
25 */ | |
26 | |
27 /** | |
28 * The XMLTokener extends the JSONTokener to provide additional methods | |
29 * for the parsing of XML texts. | |
30 * @author JSON.org | |
31 * @version 2008-09-18 | |
32 */ | |
33 public class XMLTokener extends JSONTokener { | |
34 | |
35 | |
36 /** The table of entity values. It initially contains Character values for | |
37 * amp, apos, gt, lt, quot. | |
38 */ | |
39 public static final java.util.HashMap entity; | |
40 | |
41 static { | |
42 entity = new java.util.HashMap(8); | |
43 entity.put("amp", XML.AMP); | |
44 entity.put("apos", XML.APOS); | |
45 entity.put("gt", XML.GT); | |
46 entity.put("lt", XML.LT); | |
47 entity.put("quot", XML.QUOT); | |
48 } | |
49 | |
50 /** | |
51 * Construct an XMLTokener from a string. | |
52 * @param s A source string. | |
53 */ | |
54 public XMLTokener(String s) { | |
55 super(s); | |
56 } | |
57 | |
58 /** | |
59 * Get the text in the CDATA block. | |
60 * @return The string up to the <code>]]></code>. | |
61 * @throws JSONException If the <code>]]></code> is not found. | |
62 */ | |
63 public String nextCDATA() throws JSONException { | |
64 char c; | |
65 int i; | |
66 StringBuffer sb = new StringBuffer(); | |
67 for (;;) { | |
68 c = next(); | |
69 if (c == 0) { | |
70 throw syntaxError("Unclosed CDATA"); | |
71 } | |
72 sb.append(c); | |
73 i = sb.length() - 3; | |
74 if (i >= 0 && sb.charAt(i) == ']' && | |
75 sb.charAt(i + 1) == ']' && sb.charAt(i + 2) == '>') { | |
76 sb.setLength(i); | |
77 return sb.toString(); | |
78 } | |
79 } | |
80 } | |
81 | |
82 | |
83 /** | |
84 * Get the next XML outer token, trimming whitespace. There are two kinds | |
85 * of tokens: the '<' character which begins a markup tag, and the content | |
86 * text between markup tags. | |
87 * | |
88 * @return A string, or a '<' Character, or null if there is no more | |
89 * source text. | |
90 * @throws JSONException | |
91 */ | |
92 public Object nextContent() throws JSONException { | |
93 char c; | |
94 StringBuffer sb; | |
95 do { | |
96 c = next(); | |
97 } while (Character.isWhitespace(c)); | |
98 if (c == 0) { | |
99 return null; | |
100 } | |
101 if (c == '<') { | |
102 return XML.LT; | |
103 } | |
104 sb = new StringBuffer(); | |
105 for (;;) { | |
106 if (c == '<' || c == 0) { | |
107 back(); | |
108 return sb.toString().trim(); | |
109 } | |
110 if (c == '&') { | |
111 sb.append(nextEntity(c)); | |
112 } else { | |
113 sb.append(c); | |
114 } | |
115 c = next(); | |
116 } | |
117 } | |
118 | |
119 | |
120 /** | |
121 * Return the next entity. These entities are translated to Characters: | |
122 * <code>& ' > < "</code>. | |
123 * @param a An ampersand character. | |
124 * @return A Character or an entity String if the entity is not recognized. | |
125 * @throws JSONException If missing ';' in XML entity. | |
126 */ | |
127 public Object nextEntity(char a) throws JSONException { | |
128 StringBuffer sb = new StringBuffer(); | |
129 for (;;) { | |
130 char c = next(); | |
131 if (Character.isLetterOrDigit(c) || c == '#') { | |
132 sb.append(Character.toLowerCase(c)); | |
133 } else if (c == ';') { | |
134 break; | |
135 } else { | |
136 throw syntaxError("Missing ';' in XML entity: &" + sb); | |
137 } | |
138 } | |
139 String s = sb.toString(); | |
140 Object e = entity.get(s); | |
141 return e != null ? e : a + s + ";"; | |
142 } | |
143 | |
144 | |
145 /** | |
146 * Returns the next XML meta token. This is used for skipping over <!...> | |
147 * and <?...?> structures. | |
148 * @return Syntax characters (<code>< > / = ! ?</code>) are returned as | |
149 * Character, and strings and names are returned as Boolean. We don't care | |
150 * what the values actually are. | |
151 * @throws JSONException If a string is not properly closed or if the XML | |
152 * is badly structured. | |
153 */ | |
154 public Object nextMeta() throws JSONException { | |
155 char c; | |
156 char q; | |
157 do { | |
158 c = next(); | |
159 } while (Character.isWhitespace(c)); | |
160 switch (c) { | |
161 case 0: | |
162 throw syntaxError("Misshaped meta tag"); | |
163 case '<': | |
164 return XML.LT; | |
165 case '>': | |
166 return XML.GT; | |
167 case '/': | |
168 return XML.SLASH; | |
169 case '=': | |
170 return XML.EQ; | |
171 case '!': | |
172 return XML.BANG; | |
173 case '?': | |
174 return XML.QUEST; | |
175 case '"': | |
176 case '\'': | |
177 q = c; | |
178 for (;;) { | |
179 c = next(); | |
180 if (c == 0) { | |
181 throw syntaxError("Unterminated string"); | |
182 } | |
183 if (c == q) { | |
184 return Boolean.TRUE; | |
185 } | |
186 } | |
187 default: | |
188 for (;;) { | |
189 c = next(); | |
190 if (Character.isWhitespace(c)) { | |
191 return Boolean.TRUE; | |
192 } | |
193 switch (c) { | |
194 case 0: | |
195 case '<': | |
196 case '>': | |
197 case '/': | |
198 case '=': | |
199 case '!': | |
200 case '?': | |
201 case '"': | |
202 case '\'': | |
203 back(); | |
204 return Boolean.TRUE; | |
205 } | |
206 } | |
207 } | |
208 } | |
209 | |
210 | |
211 /** | |
212 * Get the next XML Token. These tokens are found inside of angle | |
213 * brackets. It may be one of these characters: <code>/ > = ! ?</code> or it | |
214 * may be a string wrapped in single quotes or double quotes, or it may be a | |
215 * name. | |
216 * @return a String or a Character. | |
217 * @throws JSONException If the XML is not well formed. | |
218 */ | |
219 public Object nextToken() throws JSONException { | |
220 char c; | |
221 char q; | |
222 StringBuffer sb; | |
223 do { | |
224 c = next(); | |
225 } while (Character.isWhitespace(c)); | |
226 switch (c) { | |
227 case 0: | |
228 throw syntaxError("Misshaped element"); | |
229 case '<': | |
230 throw syntaxError("Misplaced '<'"); | |
231 case '>': | |
232 return XML.GT; | |
233 case '/': | |
234 return XML.SLASH; | |
235 case '=': | |
236 return XML.EQ; | |
237 case '!': | |
238 return XML.BANG; | |
239 case '?': | |
240 return XML.QUEST; | |
241 | |
242 // Quoted string | |
243 | |
244 case '"': | |
245 case '\'': | |
246 q = c; | |
247 sb = new StringBuffer(); | |
248 for (;;) { | |
249 c = next(); | |
250 if (c == 0) { | |
251 throw syntaxError("Unterminated string"); | |
252 } | |
253 if (c == q) { | |
254 return sb.toString(); | |
255 } | |
256 if (c == '&') { | |
257 sb.append(nextEntity(c)); | |
258 } else { | |
259 sb.append(c); | |
260 } | |
261 } | |
262 default: | |
263 | |
264 // Name | |
265 | |
266 sb = new StringBuffer(); | |
267 for (;;) { | |
268 sb.append(c); | |
269 c = next(); | |
270 if (Character.isWhitespace(c)) { | |
271 return sb.toString(); | |
272 } | |
273 switch (c) { | |
274 case 0: | |
275 return sb.toString(); | |
276 case '>': | |
277 case '/': | |
278 case '=': | |
279 case '!': | |
280 case '?': | |
281 case '[': | |
282 case ']': | |
283 back(); | |
284 return sb.toString(); | |
285 case '<': | |
286 case '"': | |
287 case '\'': | |
288 throw syntaxError("Bad character in a name"); | |
289 } | |
290 } | |
291 } | |
292 } | |
293 | |
294 | |
295 /** | |
296 * Skip characters until past the requested string. | |
297 * If it is not found, we are left at the end of the source with a result of false. | |
298 * @param to A string to skip past. | |
299 * @throws JSONException | |
300 */ | |
301 public boolean skipPast(String to) throws JSONException { | |
302 boolean b; | |
303 char c; | |
304 int i; | |
305 int j; | |
306 int offset = 0; | |
307 int n = to.length(); | |
308 char[] circle = new char[n]; | |
309 | |
310 /* | |
311 * First fill the circle buffer with as many characters as are in the | |
312 * to string. If we reach an early end, bail. | |
313 */ | |
314 | |
315 for (i = 0; i < n; i += 1) { | |
316 c = next(); | |
317 if (c == 0) { | |
318 return false; | |
319 } | |
320 circle[i] = c; | |
321 } | |
322 /* | |
323 * We will loop, possibly for all of the remaining characters. | |
324 */ | |
325 for (;;) { | |
326 j = offset; | |
327 b = true; | |
328 /* | |
329 * Compare the circle buffer with the to string. | |
330 */ | |
331 for (i = 0; i < n; i += 1) { | |
332 if (circle[j] != to.charAt(i)) { | |
333 b = false; | |
334 break; | |
335 } | |
336 j += 1; | |
337 if (j >= n) { | |
338 j -= n; | |
339 } | |
340 } | |
341 /* | |
342 * If we exit the loop with b intact, then victory is ours. | |
343 */ | |
344 if (b) { | |
345 return true; | |
346 } | |
347 /* | |
348 * Get the next character. If there isn't one, then defeat is ours. | |
349 */ | |
350 c = next(); | |
351 if (c == 0) { | |
352 return false; | |
353 } | |
354 /* | |
355 * Shove the character in the circle buffer and advance the | |
356 * circle offset. The offset is mod n. | |
357 */ | |
358 circle[offset] = c; | |
359 offset += 1; | |
360 if (offset >= n) { | |
361 offset -= n; | |
362 } | |
363 } | |
364 } | |
365 } |