annotate src/main/java/org/mpi/openmind/repository/utils/OM4XmlEventReader.java @ 31:7d8ebe8ac8a2

create reader and check script for XML dumps.
author casties
date Wed, 24 Aug 2016 19:12:24 +0200
parents
children 9c54842f5e86
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
1 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
2 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
3 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
4 package org.mpi.openmind.repository.utils;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
5
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
6 import java.io.InputStream;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
7 import java.util.ArrayList;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
8 import java.util.HashMap;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
9 import java.util.Iterator;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
10 import java.util.List;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
11 import java.util.Map;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
12
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
13 import javax.xml.namespace.QName;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
14 import javax.xml.stream.XMLEventReader;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
15 import javax.xml.stream.XMLInputFactory;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
16 import javax.xml.stream.XMLStreamException;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
17 import javax.xml.stream.events.Attribute;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
18 import javax.xml.stream.events.Characters;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
19 import javax.xml.stream.events.EndElement;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
20 import javax.xml.stream.events.StartElement;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
21 import javax.xml.stream.events.XMLEvent;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
22
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
23 import org.apache.log4j.Logger;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
24
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
25 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
26 * Class that reads an OM4 XML dump into lists of simple objects.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
27 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
28 * The constructor takes an InputStream.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
29 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
30 * The read() method reads the contents of the file into the members
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
31 * .entities and .relations.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
32 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
33 * The contents are Lists of OmEntities and OmRelations also holding
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
34 * Lists of omAttributes.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
35 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
36 * @author casties
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
37 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
38 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
39 public class OM4XmlEventReader {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
40
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
41 private static Logger logger = Logger.getLogger(OM4XmlEventReader.class);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
42
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
43 public OM4XmlEventReader(InputStream xmlStream) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
44 super();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
45 this.xmlStream = xmlStream;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
46 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
47
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
48 InputStream xmlStream;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
49
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
50 public int numEntities;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
51 public List<OmEntity> entities;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
52 private int entCnt = 0;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
53
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
54 public int numRelations;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
55 public List<OmRelation> relations;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
56 private int relCnt = 0;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
57
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
58 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
59 * Simple class holding the representation of an OpenMind Attribute from XML.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
60 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
61 * @author casties
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
62 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
63 public class OmAttribute {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
64 public Map<String, String> xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
65 public String value;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
66
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
67 public String getId() {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
68 return xmlAtts.get("id");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
69 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
70 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
71
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
72 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
73 * Simple class holding the representation of an OpenMind Entity from XML.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
74 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
75 * @author casties
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
76 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
77 public class OmEntity {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
78 public Map<String, String> xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
79 public String value;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
80 public List<OmAttribute> attributes;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
81
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
82 public String getId() {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
83 return xmlAtts.get("id");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
84 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
85 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
86
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
87 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
88 * Simple class holding the representation of an OpenMind Relation from XML.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
89 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
90 * @author casties
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
91 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
92 public class OmRelation {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
93 public Map<String, String> xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
94 public String value;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
95 public List<OmAttribute> attributes;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
96
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
97 public String getId() {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
98 return xmlAtts.get("id");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
99 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
100 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
101
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
102 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
103 * Reads the XML from xmlStream and populates entities and relations.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
104 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
105 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
106 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
107 public void read() throws XMLStreamException {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
108 XMLInputFactory inputFactory = XMLInputFactory.newInstance();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
109 XMLEventReader reader = inputFactory.createXMLEventReader(xmlStream, "UTF-8");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
110 try {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
111 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
112 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
113 if (e.isStartDocument()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
114 continue;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
115 } else if (e.isStartElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
116 StartElement es = e.asStartElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
117 String lname = es.getName().getLocalPart();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
118 if (lname == XMLUtil.ENTITIES) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
119 entities = processEntities(es, reader);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
120 } else if (lname == XMLUtil.RELATIONS) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
121 relations = processRelations(es, reader);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
122 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
123 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
124 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
125 } finally {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
126 reader.close();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
127 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
128 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
129
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
130 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
131 * Process the entities tag and its contents.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
132 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
133 * @param elem
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
134 * @param reader
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
135 * @return
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
136 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
137 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
138 private List<OmEntity> processEntities(StartElement elem, XMLEventReader reader) throws XMLStreamException {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
139 logger.debug("loading entities...");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
140 // get number attribute
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
141 Attribute numa = elem.getAttributeByName(new QName("number"));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
142 if (numa != null) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
143 numEntities = Integer.parseInt(numa.getValue());
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
144 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
145 // start reading sub-elements
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
146 List<OmEntity> entities = new ArrayList<OmEntity>();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
147 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
148 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
149 if (e.isStartElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
150 // start of next element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
151 StartElement es = e.asStartElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
152 String lname = es.getName().getLocalPart();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
153 if (lname == XMLUtil.ENTITY) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
154 // process entity tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
155 entities.add(processEntity(es, reader));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
156 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
157 } else if (e.isEndElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
158 EndElement ee = e.asEndElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
159 if (ee.getName().getLocalPart().equals(XMLUtil.ENTITIES)) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
160 // end of this element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
161 break;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
162 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
163 logger.warn("Unexpected EndElement: "+ee);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
164 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
165 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
166 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
167 return entities;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
168 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
169
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
170 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
171 * Process the entity tag and its contents.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
172 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
173 * @param elem
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
174 * @param reader
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
175 * @return
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
176 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
177 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
178 private OmEntity processEntity(StartElement elem, XMLEventReader reader) throws XMLStreamException {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
179 //logger.debug("entity");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
180 OmEntity ent = new OmEntity();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
181 Map<String, String> xmlAtts = new HashMap<String, String>();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
182 @SuppressWarnings("unchecked")
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
183 Iterator<Attribute> atts = elem.getAttributes();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
184 while (atts.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
185 Attribute att = atts.next();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
186 xmlAtts.put(att.getName().getLocalPart(), att.getValue());
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
187 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
188 ent.xmlAtts = xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
189 // start reading sub-elements
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
190 ent.attributes = new ArrayList<OmAttribute>();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
191 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
192 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
193 if (e.isStartElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
194 // start of next element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
195 StartElement es = e.asStartElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
196 String lname = es.getName().getLocalPart();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
197 if (lname == XMLUtil.ATTRIBUTES) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
198 // ignore attributes tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
199 continue;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
200 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
201 if (lname == XMLUtil.ATTRIBUTE) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
202 // process attribute tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
203 ent.attributes.add(processAttribute(es, reader));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
204 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
205 } else if (e.isCharacters()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
206 // text content
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
207 Characters ec = e.asCharacters();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
208 if (ent.value == null) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
209 ent.value = ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
210 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
211 ent.value += ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
212 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
213 } else if (e.isEndElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
214 EndElement ee = e.asEndElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
215 if (ee.getName().getLocalPart().equals(XMLUtil.ENTITY)) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
216 // end of this element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
217 break;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
218 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
219 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
220 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
221 if (++entCnt % 500 == 0) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
222 logger.debug(""+entCnt+" entities read...");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
223 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
224 return ent;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
225 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
226
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
227 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
228 * Process the relations tag and its contents.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
229 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
230 * @param elem
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
231 * @param reader
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
232 * @return
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
233 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
234 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
235 private List<OmRelation> processRelations(StartElement elem, XMLEventReader reader) throws XMLStreamException {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
236 logger.debug("loading relations...");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
237 // get number attribute
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
238 Attribute numa = elem.getAttributeByName(new QName("number"));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
239 if (numa != null) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
240 numRelations = Integer.parseInt(numa.getValue());
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
241 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
242 // start reading sub-elements
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
243 List<OmRelation> rels = new ArrayList<OmRelation>();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
244 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
245 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
246 if (e.isStartElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
247 // start of next element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
248 StartElement es = e.asStartElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
249 String lname = es.getName().getLocalPart();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
250 if (lname == XMLUtil.RELATION) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
251 // process entity tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
252 rels.add(processRelation(es, reader));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
253 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
254 } else if (e.isEndElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
255 EndElement ee = e.asEndElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
256 if (ee.getName().getLocalPart().equals(XMLUtil.RELATIONS)) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
257 // end of this element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
258 break;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
259 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
260 logger.warn("Unexpected EndElement: "+ee);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
261 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
262 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
263 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
264 return rels;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
265 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
266
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
267
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
268 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
269 * Process the relation tag and its contents.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
270 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
271 * @param elem
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
272 * @param reader
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
273 * @return
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
274 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
275 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
276 private OmRelation processRelation(StartElement elem, XMLEventReader reader) throws XMLStreamException {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
277 //logger.debug("relation");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
278 OmRelation rel = new OmRelation();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
279 Map<String, String> xmlAtts = new HashMap<String, String>();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
280 @SuppressWarnings("unchecked")
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
281 Iterator<Attribute> atts = elem.getAttributes();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
282 while (atts.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
283 Attribute att = atts.next();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
284 xmlAtts.put(att.getName().getLocalPart(), att.getValue());
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
285 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
286 rel.xmlAtts = xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
287 // start reading sub-elements
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
288 rel.attributes = new ArrayList<OmAttribute>();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
289 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
290 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
291 if (e.isStartElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
292 // start of next element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
293 StartElement es = e.asStartElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
294 String lname = es.getName().getLocalPart();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
295 if (lname == XMLUtil.ATTRIBUTES) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
296 // ignore attributes tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
297 continue;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
298 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
299 if (lname == XMLUtil.ATTRIBUTE) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
300 // process attribute tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
301 rel.attributes.add(processAttribute(es, reader));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
302 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
303 } else if (e.isCharacters()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
304 // text content
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
305 Characters ec = e.asCharacters();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
306 if (rel.value == null) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
307 rel.value = ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
308 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
309 rel.value += ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
310 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
311 } else if (e.isEndElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
312 EndElement ee = e.asEndElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
313 if (ee.getName().getLocalPart().equals(XMLUtil.RELATION)) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
314 // end of this element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
315 break;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
316 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
317 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
318 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
319 if (++relCnt % 100 == 0) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
320 logger.debug(""+relCnt+" relations read...");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
321 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
322 return rel;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
323 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
324
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
325 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
326 * Process the attribute tag and its contents.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
327 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
328 * @param elem
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
329 * @param reader
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
330 * @return
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
331 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
332 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
333 private OmAttribute processAttribute(StartElement elem, XMLEventReader reader) throws XMLStreamException {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
334 //logger.debug("attribute");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
335 OmAttribute oma = new OmAttribute();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
336 Map<String, String> xmlAtts = new HashMap<String, String>();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
337 @SuppressWarnings("unchecked")
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
338 Iterator<Attribute> atts = elem.getAttributes();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
339 while (atts.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
340 Attribute att = atts.next();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
341 xmlAtts.put(att.getName().getLocalPart(), att.getValue());
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
342 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
343 oma.xmlAtts = xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
344 // start reading sub-elements
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
345 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
346 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
347 if (e.isCharacters()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
348 // text content
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
349 Characters ec = e.asCharacters();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
350 if (oma.value == null) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
351 oma.value = ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
352 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
353 oma.value += ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
354 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
355 } else if (e.isEndElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
356 EndElement ee = e.asEndElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
357 if (ee.getName().getLocalPart().equals(XMLUtil.ATTRIBUTE)) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
358 // end of this element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
359 break;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
360 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
361 logger.warn("Unexpected EndElement: "+ee);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
362 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
363 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
364 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
365 return oma;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
366 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
367
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
368 }