Mercurial > hg > openmind
annotate src/main/java/org/mpi/openmind/repository/utils/OM4StreamWriter.java @ 127:3e772f7f43e0 default tip
ismi-date with long month names in xml dump.
author | Robert Casties <casties@mpiwg-berlin.mpg.de> |
---|---|
date | Thu, 11 May 2023 18:15:45 +0200 |
parents | 7d1e61a6d91b |
children |
rev | line source |
---|---|
1 | 1 package org.mpi.openmind.repository.utils; |
2 | |
3 import java.io.FileOutputStream; | |
4 import java.io.IOException; | |
5 import java.io.OutputStreamWriter; | |
6 import java.text.DecimalFormat; | |
7 import java.util.ArrayList; | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
8 import java.util.HashMap; |
1 | 9 import java.util.List; |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
10 import java.util.Map; |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
11 import java.util.regex.Matcher; |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
12 import java.util.regex.Pattern; |
1 | 13 |
29 | 14 import javax.xml.stream.XMLOutputFactory; |
15 import javax.xml.stream.XMLStreamException; | |
16 import javax.xml.stream.XMLStreamWriter; | |
1 | 17 |
18 import org.apache.commons.lang.StringUtils; | |
19 import org.apache.log4j.Logger; | |
103
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
20 import org.joda.time.DateTime; |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
21 import org.joda.time.format.DateTimeFormatter; |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
22 import org.joda.time.format.ISODateTimeFormat; |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
23 import org.json.JSONException; |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
24 import org.json.JSONObject; |
1 | 25 import org.mpi.openmind.repository.bo.Attribute; |
26 import org.mpi.openmind.repository.bo.Entity; | |
27 import org.mpi.openmind.repository.bo.Node; | |
28 import org.mpi.openmind.repository.bo.Relation; | |
29 import org.mpi.openmind.repository.services.PersistenceService; | |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
30 import org.mpi.openmind.repository.utils.ismi.ISMICalendar; |
1 | 31 |
31 | 32 /** |
33 * Export all entities and relations and definitions to XML. | |
34 * | |
75 | 35 * Saves (content) entities and relations (i.e. assertions) and definitions |
31 | 36 * (i.e. definition entities and relations) in separate files. |
37 * | |
38 * @author jurzua, casties | |
39 * | |
40 */ | |
1 | 41 public class OM4StreamWriter { |
42 | |
127
3e772f7f43e0
ismi-date with long month names in xml dump.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
126
diff
changeset
|
43 protected static final String FORMAT_VERSION = "4.15"; |
87
8005f7011975
update version number in xml.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
86
diff
changeset
|
44 |
29 | 45 private static Logger logger = Logger.getLogger(OM4StreamWriter.class); |
46 | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
47 private static final int itemsPerPage = 500; |
77 | 48 |
49 /** Include normalized own-values. */ | |
78 | 50 public static boolean includeNormalizations = true; |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
51 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
52 /** key for entity count in attribute counts map */ |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
53 private static final String ENT_KEY = "<entity-count>"; |
29 | 54 |
103
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
55 /** formatter for isodate tag */ |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
56 public static DateTimeFormatter isodateFormatter = ISODateTimeFormat.date(); |
103
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
57 |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
58 /** pattern for bibid in endnote-id attribute */ |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
59 public static final Pattern bibidPattern = Pattern.compile("#(\\d+)"); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
60 |
29 | 61 /** |
62 * Return the object's string representation or "null" if its null. | |
63 * | |
64 * @param s | |
65 * @return | |
66 */ | |
31 | 67 private static String defaultString(Object s) { |
29 | 68 if (s == null) { |
69 return "null"; | |
70 } else { | |
71 return s.toString(); | |
72 } | |
73 } | |
74 | |
75 | |
75 | 76 /** |
77 * Saves all content Entities with their Attributes and Relations in a XML file with the given fileName. | |
78 * | |
79 * @param fileName | |
80 * @param ps | |
81 */ | |
29 | 82 public static void backupEntities(String fileName, PersistenceService ps) { |
78 | 83 writeEntsAndRels(fileName, ps, Node.TYPE_ABOX, includeNormalizations); |
29 | 84 } |
85 | |
75 | 86 /** |
87 * Saves all definitions in a XML file with the given fileName. | |
88 * | |
89 * @param fileName | |
90 * @param ps | |
91 */ | |
29 | 92 public static void backupDefinitions(String fileName, PersistenceService ps) { |
78 | 93 writeEntsAndRels(fileName, ps, Node.TYPE_TBOX, false); |
29 | 94 } |
95 | |
96 /** | |
97 * Writes all entities of the given type and their relations to the XML file at fileName. | |
98 * | |
75 | 99 * Type is either TYPE_TBOX or TYPE_ABOX. |
100 * | |
29 | 101 * @param fileName |
102 * @param ps | |
75 | 103 * @param type |
29 | 104 */ |
78 | 105 private static void writeEntsAndRels(String fileName, PersistenceService ps, String type, boolean includeNorm) { |
29 | 106 OutputStreamWriter out; |
107 try { | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
108 // statistics collection Maps |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
109 Map<String, Map<String, Long>> entStats = new HashMap<String, Map<String, Long>>(); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
110 Map<String, Map<String, Long>> relStats = new HashMap<String, Map<String, Long>>(); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
111 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
112 // setup xml writer |
29 | 113 FileOutputStream fileOut = new FileOutputStream(fileName); |
114 out = new OutputStreamWriter(fileOut, "UTF-8"); | |
115 XMLOutputFactory factory = XMLOutputFactory.newInstance(); | |
116 XMLStreamWriter writer = factory.createXMLStreamWriter(out); | |
117 | |
118 int entitiesCount = 0; | |
119 | |
120 writer.writeStartDocument("UTF-8", "1.0"); | |
121 | |
122 if (type.equals(Node.TYPE_ABOX)) { | |
123 writer.writeStartElement(XMLUtil.OPENMIND_DATA); | |
87
8005f7011975
update version number in xml.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
86
diff
changeset
|
124 writer.writeAttribute("version", FORMAT_VERSION); |
75 | 125 // get number of content Entities |
29 | 126 entitiesCount = ps.getEntityCount(null).intValue(); |
127 } else { | |
128 writer.writeStartElement(XMLUtil.META_DATA); | |
87
8005f7011975
update version number in xml.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
86
diff
changeset
|
129 writer.writeAttribute("version", FORMAT_VERSION); |
75 | 130 // get number of definition Entities |
29 | 131 entitiesCount = ps.getEntityCount(Node.TYPE_TBOX).intValue(); |
132 } | |
133 | |
134 int numberOfPages = entitiesCount / itemsPerPage; | |
135 // debug: int numberOfPages = 1; | |
136 int counter = 0; | |
137 long start = System.currentTimeMillis(); | |
138 DecimalFormat df = new DecimalFormat("#.##"); | |
1 | 139 |
75 | 140 // list of Relations (filled from Entities) |
29 | 141 List<Relation> relList = new ArrayList<Relation>(); |
142 | |
143 /* | |
144 * write entities | |
145 */ | |
146 writer.writeStartElement((type.equals(Node.TYPE_TBOX)) ? XMLUtil.DEFINITIONS : XMLUtil.ENTITIES); | |
82
90f9a1c45b15
small change to xml format.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
79
diff
changeset
|
147 writer.writeAttribute("count", Integer.toString(entitiesCount)); |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
148 // iterate database by pages |
29 | 149 for (int currentPage = 0; currentPage <= numberOfPages; currentPage++) { |
150 int startRecord = currentPage * itemsPerPage; | |
151 List<Entity> entities; | |
152 | |
153 if (type.equals(Node.TYPE_ABOX)) { | |
75 | 154 // get page of content Entities |
29 | 155 entities = ps.getEntityPage(null, startRecord, itemsPerPage); |
156 } else { | |
75 | 157 // get page of definition Entities |
29 | 158 entities = ps.getEntityPage(Node.TYPE_TBOX, startRecord, itemsPerPage); |
159 } | |
160 | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
161 // iterate entities |
29 | 162 for (Entity ent : entities) { |
75 | 163 // write entity to XML |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
164 writeEntity(ent, writer, ps, includeNorm, entStats); |
29 | 165 // add (source)relations to list |
119
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
166 List<Relation> srcRels = ent.getSourceRelations(); |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
167 relList.addAll(srcRels); |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
168 // update stats for relations |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
169 Map<String, Long> entRelStats = entStats.get(ent.getObjectClass()); |
119
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
170 for (Relation rel: srcRels) { |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
171 // update source relations |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
172 updateRelStats(rel, true, entRelStats); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
173 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
174 for (Relation rel: ent.getTargetRelations()) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
175 // update target relations |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
176 updateRelStats(rel, false, entRelStats); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
177 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
178 // count entities |
29 | 179 counter++; |
180 } | |
181 | |
182 long runtime = System.currentTimeMillis() - start; | |
183 double percent = ((double) counter / (double) entitiesCount) * 100.0; | |
184 logger.debug("(" + df.format(percent) + "%) \t[" + counter + "/" + entitiesCount + "]\t"); | |
185 logger.debug("Speed[ents/s]: " + df.format((double) counter / ((double) runtime / 1000))); | |
186 writer.flush(); | |
187 } | |
188 writer.writeEndElement(); | |
189 | |
190 /* | |
191 * write relations (from list) | |
192 */ | |
193 writer.writeStartElement(XMLUtil.RELATIONS); | |
82
90f9a1c45b15
small change to xml format.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
79
diff
changeset
|
194 writer.writeAttribute("count", Integer.toString(relList.size())); |
29 | 195 for (Relation rel : relList) { |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
196 writeRelation(rel, writer, includeNorm, relStats); |
29 | 197 } |
198 writer.writeEndElement(); | |
199 | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
200 /* |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
201 * write statistics |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
202 */ |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
203 // entity stats |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
204 writeStats(XMLUtil.ENTITY_STATS, XMLUtil.ENTITY, entStats, writer); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
205 // relation stats |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
206 writeStats(XMLUtil.RELATION_STATS, XMLUtil.RELATION, relStats, writer); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
207 |
29 | 208 // end file. |
209 writer.writeEndElement(); | |
210 | |
211 writer.flush(); | |
212 writer.close(); | |
213 | |
214 logger.info("END Stream Writer"); | |
215 } catch (IOException e) { | |
75 | 216 logger.error(e); |
29 | 217 } catch (XMLStreamException e) { |
75 | 218 logger.error(e); |
29 | 219 } |
220 } | |
1 | 221 |
29 | 222 /** |
223 * Write OpenMind relation to XML. | |
224 * | |
225 * @param rel | |
226 * @param writer | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
227 * @param relStats |
29 | 228 * @throws XMLStreamException |
229 */ | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
230 private static void writeRelation(Relation rel, XMLStreamWriter writer, boolean includeNorm, |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
231 Map<String, Map<String, Long>> relStats) throws XMLStreamException { |
29 | 232 writer.writeStartElement(XMLUtil.RELATION); |
233 | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
234 // update stats |
100
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
235 Map<String, Long> attStats = null; |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
236 if (relStats != null) { |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
237 attStats = updateNodeStats(rel, relStats); |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
238 } |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
239 |
29 | 240 /* |
241 * write XML-attributes | |
242 */ | |
243 writer.writeAttribute(XMLUtil.OBJECT_CLASS, defaultString(rel.getObjectClass())); | |
244 writer.writeAttribute(XMLUtil.ID, defaultString(rel.getId())); | |
245 writer.writeAttribute(XMLUtil.ROW_ID, defaultString(rel.getRowId())); | |
75 | 246 if (StringUtils.isNotEmpty(rel.getContentType())) { |
29 | 247 writer.writeAttribute(XMLUtil.CONTENT_TYPE, rel.getContentType()); |
75 | 248 } |
29 | 249 writer.writeAttribute(XMLUtil.RELATION_SOURCE_ID, defaultString(rel.getSourceId())); |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
250 writer.writeAttribute(XMLUtil.RELATION_SOURCE, defaultString(rel.getSourceObjectClass())); |
29 | 251 writer.writeAttribute(XMLUtil.RELATION_TARGET_ID, defaultString(rel.getTargetId())); |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
252 writer.writeAttribute(XMLUtil.RELATION_TARGET, defaultString(rel.getTargetObjectClass())); |
29 | 253 writer.writeAttribute(XMLUtil.VERSION, defaultString(rel.getVersion())); |
254 writer.writeAttribute(XMLUtil.MODIFICATION_TIME, defaultString(rel.getModificationTime())); | |
75 | 255 if (rel.getUser() != null) { |
29 | 256 writer.writeAttribute(XMLUtil.USER, rel.getUser()); |
75 | 257 } |
258 if (rel.getIsPublic()) { | |
29 | 259 writer.writeAttribute(XMLUtil.PUBLIC, "true"); |
75 | 260 } |
29 | 261 |
262 /* | |
263 * write OpenMind attributes of this relation as XML tags | |
264 */ | |
265 if (rel.getAttributes().size() > 0) { | |
266 writer.writeStartElement(XMLUtil.ATTRIBUTES); | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
267 for (Attribute att : rel.getAttributes()) { |
100
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
268 if (attStats != null) { |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
269 // update stats |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
270 updateAttStats(att, attStats); |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
271 } |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
272 // write xml |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
273 writeAttribute(att, writer, includeNorm); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
274 } |
29 | 275 writer.writeEndElement(); |
276 } | |
1 | 277 |
29 | 278 /* |
279 * write own value as content | |
280 */ | |
75 | 281 if (StringUtils.isNotEmpty(rel.getOwnValue())) { |
29 | 282 writer.writeCharacters(rel.getOwnValue()); |
75 | 283 } |
29 | 284 |
285 writer.writeEndElement(); | |
286 } | |
287 | |
288 /** | |
289 * Write OpenMind entity to XML. | |
290 * | |
291 * @param entity | |
292 * @param writer | |
293 * @param ps | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
294 * @param entStats |
29 | 295 * @throws XMLStreamException |
296 */ | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
297 private static void writeEntity(Entity entity, XMLStreamWriter writer, PersistenceService ps, boolean includeNorm, |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
298 Map<String, Map<String, Long>> entStats) |
29 | 299 throws XMLStreamException { |
1 | 300 |
29 | 301 writer.writeStartElement((entity.getType().equals(Node.TYPE_TBOX)) ? XMLUtil.DEFINITION : XMLUtil.ENTITY); |
302 | |
303 if (entity.isLightweight()) { | |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
304 // make sure we have all attributes and relations |
29 | 305 entity = ps.getEntityContent(entity); |
306 } | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
307 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
308 // update stats |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
309 Map<String, Long> attStats = updateNodeStats(entity, entStats); |
29 | 310 |
311 /* | |
312 * write XML attributes | |
313 */ | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
314 writer.writeAttribute(XMLUtil.OBJECT_CLASS, defaultString(entity.getObjectClass())); |
29 | 315 writer.writeAttribute(XMLUtil.ID, defaultString(entity.getId())); |
316 writer.writeAttribute(XMLUtil.ROW_ID, defaultString(entity.getRowId())); | |
75 | 317 if (StringUtils.isNotEmpty(entity.getContentType())) { |
29 | 318 writer.writeAttribute(XMLUtil.CONTENT_TYPE, entity.getContentType()); |
75 | 319 } |
29 | 320 writer.writeAttribute(XMLUtil.VERSION, defaultString(entity.getVersion())); |
321 writer.writeAttribute(XMLUtil.MODIFICATION_TIME, defaultString(entity.getModificationTime())); | |
75 | 322 if (entity.getUser() != null) { |
29 | 323 writer.writeAttribute(XMLUtil.USER, entity.getUser()); |
75 | 324 } |
325 if (entity.getIsPublic()) { | |
29 | 326 writer.writeAttribute(XMLUtil.PUBLIC, "true"); |
75 | 327 } |
120
3b0ce5e3302d
add Node status field to XML export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
119
diff
changeset
|
328 if (StringUtils.isNotEmpty(entity.getStatus())) { |
3b0ce5e3302d
add Node status field to XML export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
119
diff
changeset
|
329 writer.writeAttribute(XMLUtil.STATUS, entity.getStatus()); |
3b0ce5e3302d
add Node status field to XML export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
119
diff
changeset
|
330 } |
1 | 331 |
29 | 332 /* |
333 * write OpenMind attributes of this entity as XML tags | |
334 */ | |
335 if (entity.getAttributes().size() > 0) { | |
336 writer.writeStartElement(XMLUtil.ATTRIBUTES); | |
337 for (Attribute att : entity.getAttributes()) { | |
110
484be3266e54
omit empty attributes in XML dump.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
106
diff
changeset
|
338 // skip empty attributes |
484be3266e54
omit empty attributes in XML dump.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
106
diff
changeset
|
339 if (StringUtils.isEmpty(att.getValue())) continue; |
118
d275e1b99bce
remove is_autograph="no" values from XML dump.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
111
diff
changeset
|
340 // skip special attributes |
d275e1b99bce
remove is_autograph="no" values from XML dump.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
111
diff
changeset
|
341 if (att.getName().equals("is_autograph")) { |
d275e1b99bce
remove is_autograph="no" values from XML dump.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
111
diff
changeset
|
342 // skip non-yes values |
d275e1b99bce
remove is_autograph="no" values from XML dump.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
111
diff
changeset
|
343 if (!att.getValue().equals("yes")) continue; |
d275e1b99bce
remove is_autograph="no" values from XML dump.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
111
diff
changeset
|
344 } |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
345 // update stats |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
346 updateAttStats(att, attStats); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
347 // write xml |
78 | 348 writeAttribute(att, writer, includeNorm); |
29 | 349 } |
350 writer.writeEndElement(); | |
351 } | |
352 | |
353 /* | |
100
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
354 * write outgoing relations of this entity as XML tags |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
355 */ |
119
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
356 List<Relation> srcRels = entity.getSourceRelations(); |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
357 if (srcRels.size() > 0) { |
100
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
358 writer.writeStartElement(XMLUtil.RELATIONS); |
119
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
359 for (Relation rel : srcRels) { |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
360 // write xml (without stats) |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
361 writeRelation(rel, writer, includeNorm, null); |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
362 } |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
363 writer.writeEndElement(); |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
364 } |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
365 |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
366 /* |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
367 * write incoming relations of this entity as XML tags |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
368 */ |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
369 List<Relation> tarRels = entity.getTargetRelations(); |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
370 if (tarRels.size() > 0) { |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
371 writer.writeStartElement(XMLUtil.INVRELATIONS); |
4eac7c57e593
add entities' incoming relations to xml export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
118
diff
changeset
|
372 for (Relation rel : tarRels) { |
100
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
373 // write xml (without stats) |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
374 writeRelation(rel, writer, includeNorm, null); |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
375 } |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
376 writer.writeEndElement(); |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
377 } |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
378 |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
379 /* |
29 | 380 * write own value |
381 */ | |
77 | 382 String ov = entity.getOwnValue(); |
383 if (StringUtils.isNotEmpty(ov)) { | |
384 writer.writeCharacters(ov); | |
385 String nov = entity.getNormalizedOwnValue(); | |
386 if (includeNorm && StringUtils.isNotEmpty(nov) && !ov.equals(nov)) { | |
387 // write normalized value | |
388 writer.writeStartElement(XMLUtil.NORMALIZED); | |
389 writer.writeCharacters(nov); | |
390 writer.writeEndElement(); | |
391 } | |
75 | 392 } |
29 | 393 |
394 writer.writeEndElement(); | |
395 } | |
396 | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
397 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
398 private static void writeAttribute(Attribute att, XMLStreamWriter writer, boolean includeNorm) throws XMLStreamException { |
29 | 399 writer.writeStartElement(XMLUtil.ATTRIBUTE); |
400 | |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
401 String name = att.getName(); |
111
71465cead59c
patch "ALIAS" attribute of ALIAS type in XML export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
110
diff
changeset
|
402 if (name.equals("ALIAS")) { |
71465cead59c
patch "ALIAS" attribute of ALIAS type in XML export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
110
diff
changeset
|
403 name = "alias"; // :-( |
71465cead59c
patch "ALIAS" attribute of ALIAS type in XML export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
110
diff
changeset
|
404 } |
71465cead59c
patch "ALIAS" attribute of ALIAS type in XML export.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
110
diff
changeset
|
405 |
29 | 406 /* |
407 * write XML attributes | |
408 */ | |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
409 writer.writeAttribute(XMLUtil.ATTRIBUTE_NAME, defaultString(name)); |
29 | 410 writer.writeAttribute(XMLUtil.ID, defaultString(att.getId())); |
411 writer.writeAttribute(XMLUtil.ROW_ID, defaultString(att.getRowId())); | |
412 writer.writeAttribute(XMLUtil.CONTENT_TYPE, defaultString(att.getContentType())); | |
413 writer.writeAttribute(XMLUtil.VERSION, defaultString(att.getVersion())); | |
414 writer.writeAttribute(XMLUtil.MODIFICATION_TIME, defaultString(att.getModificationTime())); | |
75 | 415 if (att.getUser() != null) { |
29 | 416 writer.writeAttribute(XMLUtil.USER, att.getUser()); |
75 | 417 } |
418 if (att.getIsPublic()) { | |
29 | 419 writer.writeAttribute(XMLUtil.PUBLIC, "true"); |
75 | 420 } |
421 | |
29 | 422 /* |
423 * write value as content | |
424 */ | |
77 | 425 String ov = att.getValue(); |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
426 String ct = att.getContentType(); |
77 | 427 if (StringUtils.isNotEmpty(ov)) { |
428 writer.writeCharacters(ov); | |
429 String nov = att.getNormalizedOwnValue(); | |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
430 if (includeNorm && StringUtils.isNotEmpty(nov) && !ov.equals(nov) |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
431 && !(ct != null && ct.equals("date"))) { |
77 | 432 // write normalized value |
433 writer.writeStartElement(XMLUtil.NORMALIZED); | |
434 writer.writeCharacters(nov); | |
435 writer.writeEndElement(); | |
436 } | |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
437 boolean processed = false; |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
438 // convert endnote-id into additional bibid element |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
439 if (!processed && name.equals("endnote-id")) { |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
440 Matcher bibidMatch = bibidPattern.matcher(ov); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
441 if (bibidMatch.find()) { |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
442 String bibid = bibidMatch.group(1); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
443 writer.writeStartElement(XMLUtil.BIBID); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
444 writer.writeCharacters(bibid); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
445 writer.writeEndElement(); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
446 processed = true; |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
447 } |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
448 } |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
449 // convert any date JSON |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
450 if (!processed && ov.startsWith("{")) { |
103
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
451 try { |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
452 JSONObject json = new JSONObject(ov); |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
453 // convert to simple isodate element |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
454 processed = writeSimpleDate(writer, json); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
455 // convert to full ismi-date element |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
456 processed = writeIsmiDate(writer, json); |
103
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
457 } catch (JSONException e) { |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
458 // maybe not JSON... |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
459 } |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
460 } |
75 | 461 } |
1 | 462 |
29 | 463 writer.writeEndElement(); |
464 } | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
465 |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
466 |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
467 /** |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
468 * Parse JSON date object and write simple isodate element. |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
469 * |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
470 * @param writer |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
471 * @param json |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
472 * @return |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
473 * @throws JSONException |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
474 * @throws XMLStreamException |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
475 */ |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
476 private static boolean writeSimpleDate(XMLStreamWriter writer, JSONObject json) |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
477 throws JSONException, XMLStreamException { |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
478 JSONObject date = null; |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
479 if (json.has("date")) { |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
480 date = json.getJSONObject("date"); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
481 } else if (json.has("from")) { |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
482 date = json.getJSONObject("from"); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
483 } |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
484 if (date != null) { |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
485 int year = date.getInt("year"); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
486 int month = date.getInt("month"); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
487 int day = date.getInt("dayOfMonth"); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
488 DateTime dt = new DateTime(year, month, day, 0, 0); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
489 writer.writeStartElement(XMLUtil.ISODATE); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
490 writer.writeCharacters(isodateFormatter.print(dt)); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
491 writer.writeEndElement(); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
492 return true; |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
493 } |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
494 return false; |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
495 } |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
496 |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
497 |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
498 /** |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
499 * Parse JSON date object and write ismi-date element. |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
500 * |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
501 * @param writer |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
502 * @param json |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
503 * @return |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
504 * @throws JSONException |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
505 * @throws XMLStreamException |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
506 */ |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
507 private static boolean writeIsmiDate(XMLStreamWriter writer, JSONObject json) |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
508 throws JSONException, XMLStreamException { |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
509 ISMICalendar date = new ISMICalendar(json); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
510 String state = date.getState(); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
511 if (state.equals(ISMICalendar.STATE_NOT_CHECKED)) { |
125
0a0de5ec3219
change date type "date" to "day".
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
122
diff
changeset
|
512 // state="not checked" -> unspecified type |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
513 writer.writeStartElement(XMLUtil.ISMIDATE); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
514 writer.writeAttribute(XMLUtil.TYPE, "unspecified"); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
515 writer.writeCharacters(date.getDateInText()); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
516 writer.writeEndElement(); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
517 return true; |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
518 } |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
519 if (state.equals(ISMICalendar.STATE_KNOWN)) { |
125
0a0de5ec3219
change date type "date" to "day".
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
122
diff
changeset
|
520 // state="known" |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
521 writer.writeStartElement(XMLUtil.ISMIDATE); |
125
0a0de5ec3219
change date type "date" to "day".
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
122
diff
changeset
|
522 // calendar |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
523 String calendar = date.getCalendarType().toLowerCase(); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
524 writer.writeAttribute("calendar", calendar); |
125
0a0de5ec3219
change date type "date" to "day".
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
122
diff
changeset
|
525 // notes |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
526 String notes = date.getAdditionalInfo(); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
527 if (notes != null && !notes.isEmpty()) { |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
528 writer.writeAttribute("notes", notes); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
529 } |
125
0a0de5ec3219
change date type "date" to "day".
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
122
diff
changeset
|
530 // type = inputForm |
0a0de5ec3219
change date type "date" to "day".
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
122
diff
changeset
|
531 String type = date.getInputForm().toLowerCase(); |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
532 if (type.equals("year") || type.equals("range")) { |
125
0a0de5ec3219
change date type "date" to "day".
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
122
diff
changeset
|
533 writer.writeAttribute(XMLUtil.TYPE, type); |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
534 writer.writeAttribute("from", date.getFromGregorian().getDateTime().toString(isodateFormatter)); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
535 writer.writeAttribute("until", date.getUntilGregorian().getDateTime().toString(isodateFormatter)); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
536 } else if (type.equals("date")) { |
125
0a0de5ec3219
change date type "date" to "day".
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
122
diff
changeset
|
537 // date type is called "day" |
0a0de5ec3219
change date type "date" to "day".
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
122
diff
changeset
|
538 writer.writeAttribute(XMLUtil.TYPE, "day"); |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
539 writer.writeAttribute("date", date.getFromGregorian().getDateTime().toString(isodateFormatter)); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
540 } |
125
0a0de5ec3219
change date type "date" to "day".
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
122
diff
changeset
|
541 // textual date as content |
127
3e772f7f43e0
ismi-date with long month names in xml dump.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
126
diff
changeset
|
542 writer.writeCharacters(date.toLongerString()); |
122
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
543 writer.writeEndElement(); |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
544 return true; |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
545 } |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
546 return false; |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
547 } |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
548 |
8d79021099a4
XML dump with new ismi-date element.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
120
diff
changeset
|
549 |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
550 private static void writeStats(String statsTag, String entryTag, Map<String, Map<String, Long>> nodeStats, |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
551 XMLStreamWriter writer) throws XMLStreamException { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
552 // write stats tag |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
553 writer.writeStartElement(statsTag); |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
554 |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
555 for (String nodeType : nodeStats.keySet()) { |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
556 Map<String, Long> attStats = nodeStats.get(nodeType); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
557 Long nodeCnt = attStats.get(ENT_KEY); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
558 // write tag for entity/attribute |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
559 writer.writeStartElement(entryTag); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
560 writer.writeAttribute(XMLUtil.OBJECT_CLASS, (nodeType == null) ? "null" : nodeType); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
561 writer.writeAttribute(XMLUtil.COUNT, nodeCnt.toString()); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
562 |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
563 // write attributes |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
564 for (String attName : attStats.keySet()) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
565 // skip ENT_KEY |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
566 if (attName.equals(ENT_KEY)) |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
567 continue; |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
568 if (attName.contains("[")) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
569 // write relation tag |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
570 writer.writeStartElement(XMLUtil.RELATION); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
571 } else { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
572 // write attribute tag |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
573 writer.writeStartElement(XMLUtil.ATTRIBUTE); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
574 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
575 writer.writeAttribute(XMLUtil.ATTRIBUTE_NAME, attName); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
576 Long attCnt = attStats.get(attName); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
577 writer.writeAttribute(XMLUtil.COUNT, attCnt.toString()); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
578 writer.writeEndElement(); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
579 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
580 // end of entity/attribute tag |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
581 writer.writeEndElement(); |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
582 } |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
583 // end of stats tag |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
584 writer.writeEndElement(); |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
585 } |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
586 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
587 /** |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
588 * @param objectClass |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
589 * @param entStats |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
590 * @return |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
591 */ |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
592 protected static Map<String, Long> updateNodeStats(Node ent, Map<String, Map<String, Long>> entStats) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
593 String objectClass = ent.getObjectClass(); |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
594 Map<String, Long> attStats = entStats.get(objectClass); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
595 if (attStats == null) { |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
596 // create new attribute stats entry |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
597 attStats = new HashMap<String, Long>(); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
598 // add key to count entities |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
599 attStats.put(ENT_KEY, 1l); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
600 // add to map |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
601 entStats.put(objectClass, attStats); |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
602 } else { |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
603 // increment entity count |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
604 Long entCnt = attStats.get(ENT_KEY); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
605 attStats.put(ENT_KEY, entCnt + 1); |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
606 } |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
607 return attStats; |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
608 } |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
609 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
610 /** |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
611 * @param att |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
612 * @param attStats |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
613 */ |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
614 protected static void updateAttStats(Attribute att, Map<String, Long> attStats) { |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
615 String attName = att.getName(); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
616 Long cnt = attStats.get(attName); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
617 if (cnt == null) { |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
618 attStats.put(attName, 1l); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
619 } else { |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
620 attStats.put(attName, cnt + 1); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
621 } |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
622 } |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
623 |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
624 /** |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
625 * Update relation statistics. |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
626 * |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
627 * Relation stats are saved like attribute stats but with "[entity-type]" before |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
628 * or after the relation name. |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
629 * |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
630 * @param rel |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
631 * @param relStats |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
632 */ |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
633 protected static void updateRelStats(Relation rel, boolean isSrcRel, Map<String, Long> relStats) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
634 String relName = rel.getObjectClass(); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
635 if (isSrcRel) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
636 relName = relName + "[" + rel.getTargetObjectClass() + "]"; |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
637 } else { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
638 relName = "[" + rel.getSourceObjectClass() + "]" + relName; |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
639 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
640 Long cnt = relStats.get(relName); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
641 if (cnt == null) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
642 relStats.put(relName, 1l); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
643 } else { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
644 relStats.put(relName, cnt + 1); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
645 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
646 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
647 |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
648 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
649 |
1 | 650 } |