annotate src/main/java/org/mpi/openmind/repository/utils/OM4XmlEventReader.java @ 32:9c54842f5e86

better names for XML importer sub-classes.
author casties
date Thu, 25 Aug 2016 11:29:47 +0200
parents 7d8ebe8ac8a2
children 90f9a1c45b15
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
1 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
2 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
3 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
4 package org.mpi.openmind.repository.utils;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
5
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
6 import java.io.InputStream;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
7 import java.util.ArrayList;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
8 import java.util.HashMap;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
9 import java.util.Iterator;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
10 import java.util.List;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
11 import java.util.Map;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
12
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
13 import javax.xml.namespace.QName;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
14 import javax.xml.stream.XMLEventReader;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
15 import javax.xml.stream.XMLInputFactory;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
16 import javax.xml.stream.XMLStreamException;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
17 import javax.xml.stream.events.Attribute;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
18 import javax.xml.stream.events.Characters;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
19 import javax.xml.stream.events.EndElement;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
20 import javax.xml.stream.events.StartElement;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
21 import javax.xml.stream.events.XMLEvent;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
22
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
23 import org.apache.log4j.Logger;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
24
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
25 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
26 * Class that reads an OM4 XML dump into lists of simple objects.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
27 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
28 * The constructor takes an InputStream.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
29 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
30 * The read() method reads the contents of the file into the members
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
31 * .entities and .relations.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
32 *
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
33 * The contents are Lists of OmXmlEntities and OmXmlRelations holding
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
34 * Lists of omXmlAttributes.
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
35 *
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
36 * This implementation uses XMLEventReader.
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
37 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
38 * @author casties
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
39 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
40 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
41 public class OM4XmlEventReader {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
42
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
43 private static Logger logger = Logger.getLogger(OM4XmlEventReader.class);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
44
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
45 public OM4XmlEventReader(InputStream xmlStream) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
46 super();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
47 this.xmlStream = xmlStream;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
48 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
49
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
50 InputStream xmlStream;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
51
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
52 public int numEntities;
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
53 public List<OmXmlEntity> entities;
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
54 private int entCnt = 0;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
55
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
56 public int numRelations;
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
57 public List<OmXmlRelation> relations;
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
58 private int relCnt = 0;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
59
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
60 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
61 * Simple class holding the representation of an OpenMind Attribute from XML.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
62 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
63 * @author casties
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
64 */
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
65 public class OmXmlAttribute {
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
66 public Map<String, String> xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
67 public String value;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
68
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
69 public String getId() {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
70 return xmlAtts.get("id");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
71 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
72 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
73
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
74 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
75 * Simple class holding the representation of an OpenMind Entity from XML.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
76 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
77 * @author casties
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
78 */
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
79 public class OmXmlEntity {
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
80 public Map<String, String> xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
81 public String value;
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
82 public List<OmXmlAttribute> attributes;
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
83
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
84 public String getId() {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
85 return xmlAtts.get("id");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
86 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
87 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
88
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
89 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
90 * Simple class holding the representation of an OpenMind Relation from XML.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
91 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
92 * @author casties
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
93 */
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
94 public class OmXmlRelation {
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
95 public Map<String, String> xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
96 public String value;
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
97 public List<OmXmlAttribute> attributes;
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
98
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
99 public String getId() {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
100 return xmlAtts.get("id");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
101 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
102 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
103
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
104 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
105 * Reads the XML from xmlStream and populates entities and relations.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
106 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
107 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
108 */
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
109 public void read() throws XMLStreamException {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
110 XMLInputFactory inputFactory = XMLInputFactory.newInstance();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
111 XMLEventReader reader = inputFactory.createXMLEventReader(xmlStream, "UTF-8");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
112 try {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
113 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
114 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
115 if (e.isStartDocument()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
116 continue;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
117 } else if (e.isStartElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
118 StartElement es = e.asStartElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
119 String lname = es.getName().getLocalPart();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
120 if (lname == XMLUtil.ENTITIES) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
121 entities = processEntities(es, reader);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
122 } else if (lname == XMLUtil.RELATIONS) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
123 relations = processRelations(es, reader);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
124 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
125 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
126 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
127 } finally {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
128 reader.close();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
129 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
130 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
131
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
132 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
133 * Process the entities tag and its contents.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
134 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
135 * @param elem
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
136 * @param reader
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
137 * @return
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
138 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
139 */
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
140 private List<OmXmlEntity> processEntities(StartElement elem, XMLEventReader reader) throws XMLStreamException {
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
141 logger.debug("loading entities...");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
142 // get number attribute
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
143 Attribute numa = elem.getAttributeByName(new QName("number"));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
144 if (numa != null) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
145 numEntities = Integer.parseInt(numa.getValue());
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
146 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
147 // start reading sub-elements
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
148 List<OmXmlEntity> entities = new ArrayList<OmXmlEntity>();
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
149 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
150 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
151 if (e.isStartElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
152 // start of next element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
153 StartElement es = e.asStartElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
154 String lname = es.getName().getLocalPart();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
155 if (lname == XMLUtil.ENTITY) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
156 // process entity tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
157 entities.add(processEntity(es, reader));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
158 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
159 } else if (e.isEndElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
160 EndElement ee = e.asEndElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
161 if (ee.getName().getLocalPart().equals(XMLUtil.ENTITIES)) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
162 // end of this element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
163 break;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
164 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
165 logger.warn("Unexpected EndElement: "+ee);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
166 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
167 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
168 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
169 return entities;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
170 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
171
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
172 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
173 * Process the entity tag and its contents.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
174 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
175 * @param elem
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
176 * @param reader
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
177 * @return
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
178 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
179 */
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
180 private OmXmlEntity processEntity(StartElement elem, XMLEventReader reader) throws XMLStreamException {
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
181 //logger.debug("entity");
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
182 OmXmlEntity ent = new OmXmlEntity();
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
183 Map<String, String> xmlAtts = new HashMap<String, String>();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
184 @SuppressWarnings("unchecked")
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
185 Iterator<Attribute> atts = elem.getAttributes();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
186 while (atts.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
187 Attribute att = atts.next();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
188 xmlAtts.put(att.getName().getLocalPart(), att.getValue());
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
189 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
190 ent.xmlAtts = xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
191 // start reading sub-elements
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
192 ent.attributes = new ArrayList<OmXmlAttribute>();
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
193 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
194 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
195 if (e.isStartElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
196 // start of next element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
197 StartElement es = e.asStartElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
198 String lname = es.getName().getLocalPart();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
199 if (lname == XMLUtil.ATTRIBUTES) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
200 // ignore attributes tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
201 continue;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
202 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
203 if (lname == XMLUtil.ATTRIBUTE) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
204 // process attribute tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
205 ent.attributes.add(processAttribute(es, reader));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
206 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
207 } else if (e.isCharacters()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
208 // text content
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
209 Characters ec = e.asCharacters();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
210 if (ent.value == null) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
211 ent.value = ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
212 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
213 ent.value += ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
214 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
215 } else if (e.isEndElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
216 EndElement ee = e.asEndElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
217 if (ee.getName().getLocalPart().equals(XMLUtil.ENTITY)) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
218 // end of this element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
219 break;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
220 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
221 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
222 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
223 if (++entCnt % 500 == 0) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
224 logger.debug(""+entCnt+" entities read...");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
225 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
226 return ent;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
227 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
228
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
229 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
230 * Process the relations tag and its contents.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
231 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
232 * @param elem
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
233 * @param reader
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
234 * @return
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
235 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
236 */
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
237 private List<OmXmlRelation> processRelations(StartElement elem, XMLEventReader reader) throws XMLStreamException {
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
238 logger.debug("loading relations...");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
239 // get number attribute
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
240 Attribute numa = elem.getAttributeByName(new QName("number"));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
241 if (numa != null) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
242 numRelations = Integer.parseInt(numa.getValue());
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
243 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
244 // start reading sub-elements
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
245 List<OmXmlRelation> rels = new ArrayList<OmXmlRelation>();
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
246 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
247 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
248 if (e.isStartElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
249 // start of next element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
250 StartElement es = e.asStartElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
251 String lname = es.getName().getLocalPart();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
252 if (lname == XMLUtil.RELATION) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
253 // process entity tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
254 rels.add(processRelation(es, reader));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
255 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
256 } else if (e.isEndElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
257 EndElement ee = e.asEndElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
258 if (ee.getName().getLocalPart().equals(XMLUtil.RELATIONS)) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
259 // end of this element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
260 break;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
261 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
262 logger.warn("Unexpected EndElement: "+ee);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
263 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
264 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
265 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
266 return rels;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
267 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
268
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
269
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
270 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
271 * Process the relation tag and its contents.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
272 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
273 * @param elem
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
274 * @param reader
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
275 * @return
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
276 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
277 */
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
278 private OmXmlRelation processRelation(StartElement elem, XMLEventReader reader) throws XMLStreamException {
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
279 //logger.debug("relation");
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
280 OmXmlRelation rel = new OmXmlRelation();
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
281 Map<String, String> xmlAtts = new HashMap<String, String>();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
282 @SuppressWarnings("unchecked")
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
283 Iterator<Attribute> atts = elem.getAttributes();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
284 while (atts.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
285 Attribute att = atts.next();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
286 xmlAtts.put(att.getName().getLocalPart(), att.getValue());
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
287 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
288 rel.xmlAtts = xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
289 // start reading sub-elements
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
290 rel.attributes = new ArrayList<OmXmlAttribute>();
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
291 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
292 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
293 if (e.isStartElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
294 // start of next element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
295 StartElement es = e.asStartElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
296 String lname = es.getName().getLocalPart();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
297 if (lname == XMLUtil.ATTRIBUTES) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
298 // ignore attributes tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
299 continue;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
300 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
301 if (lname == XMLUtil.ATTRIBUTE) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
302 // process attribute tag
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
303 rel.attributes.add(processAttribute(es, reader));
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
304 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
305 } else if (e.isCharacters()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
306 // text content
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
307 Characters ec = e.asCharacters();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
308 if (rel.value == null) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
309 rel.value = ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
310 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
311 rel.value += ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
312 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
313 } else if (e.isEndElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
314 EndElement ee = e.asEndElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
315 if (ee.getName().getLocalPart().equals(XMLUtil.RELATION)) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
316 // end of this element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
317 break;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
318 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
319 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
320 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
321 if (++relCnt % 100 == 0) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
322 logger.debug(""+relCnt+" relations read...");
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
323 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
324 return rel;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
325 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
326
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
327 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
328 * Process the attribute tag and its contents.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
329 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
330 * @param elem
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
331 * @param reader
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
332 * @return
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
333 * @throws XMLStreamException
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
334 */
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
335 private OmXmlAttribute processAttribute(StartElement elem, XMLEventReader reader) throws XMLStreamException {
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
336 //logger.debug("attribute");
32
9c54842f5e86 better names for XML importer sub-classes.
casties
parents: 31
diff changeset
337 OmXmlAttribute oma = new OmXmlAttribute();
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
338 Map<String, String> xmlAtts = new HashMap<String, String>();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
339 @SuppressWarnings("unchecked")
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
340 Iterator<Attribute> atts = elem.getAttributes();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
341 while (atts.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
342 Attribute att = atts.next();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
343 xmlAtts.put(att.getName().getLocalPart(), att.getValue());
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
344 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
345 oma.xmlAtts = xmlAtts;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
346 // start reading sub-elements
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
347 while (reader.hasNext()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
348 XMLEvent e = reader.nextEvent();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
349 if (e.isCharacters()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
350 // text content
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
351 Characters ec = e.asCharacters();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
352 if (oma.value == null) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
353 oma.value = ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
354 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
355 oma.value += ec.getData();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
356 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
357 } else if (e.isEndElement()) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
358 EndElement ee = e.asEndElement();
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
359 if (ee.getName().getLocalPart().equals(XMLUtil.ATTRIBUTE)) {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
360 // end of this element
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
361 break;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
362 } else {
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
363 logger.warn("Unexpected EndElement: "+ee);
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
364 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
365 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
366 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
367 return oma;
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
368 }
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
369
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents:
diff changeset
370 }