Mercurial > hg > openmind
annotate src/main/java/org/mpi/openmind/repository/utils/OM4StreamWriter.java @ 106:93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
author | Robert Casties <casties@mpiwg-berlin.mpg.de> |
---|---|
date | Fri, 26 Apr 2019 18:12:23 +0200 |
parents | 1149eb948036 |
children | 484be3266e54 |
rev | line source |
---|---|
1 | 1 package org.mpi.openmind.repository.utils; |
2 | |
3 import java.io.FileOutputStream; | |
4 import java.io.IOException; | |
5 import java.io.OutputStreamWriter; | |
6 import java.text.DecimalFormat; | |
7 import java.util.ArrayList; | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
8 import java.util.HashMap; |
1 | 9 import java.util.List; |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
10 import java.util.Map; |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
11 import java.util.regex.Matcher; |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
12 import java.util.regex.Pattern; |
1 | 13 |
29 | 14 import javax.xml.stream.XMLOutputFactory; |
15 import javax.xml.stream.XMLStreamException; | |
16 import javax.xml.stream.XMLStreamWriter; | |
1 | 17 |
18 import org.apache.commons.lang.StringUtils; | |
19 import org.apache.log4j.Logger; | |
103
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
20 import org.joda.time.DateTime; |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
21 import org.joda.time.format.DateTimeFormatter; |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
22 import org.joda.time.format.ISODateTimeFormat; |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
23 import org.json.JSONException; |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
24 import org.json.JSONObject; |
1 | 25 import org.mpi.openmind.repository.bo.Attribute; |
26 import org.mpi.openmind.repository.bo.Entity; | |
27 import org.mpi.openmind.repository.bo.Node; | |
28 import org.mpi.openmind.repository.bo.Relation; | |
29 import org.mpi.openmind.repository.services.PersistenceService; | |
30 | |
31 | 31 /** |
32 * Export all entities and relations and definitions to XML. | |
33 * | |
75 | 34 * Saves (content) entities and relations (i.e. assertions) and definitions |
31 | 35 * (i.e. definition entities and relations) in separate files. |
36 * | |
37 * @author jurzua, casties | |
38 * | |
39 */ | |
1 | 40 public class OM4StreamWriter { |
41 | |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
42 protected static final String FORMAT_VERSION = "4.10"; |
87
8005f7011975
update version number in xml.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
86
diff
changeset
|
43 |
29 | 44 private static Logger logger = Logger.getLogger(OM4StreamWriter.class); |
45 | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
46 private static final int itemsPerPage = 500; |
77 | 47 |
48 /** Include normalized own-values. */ | |
78 | 49 public static boolean includeNormalizations = true; |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
50 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
51 /** key for entity count in attribute counts map */ |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
52 private static final String ENT_KEY = "<entity-count>"; |
29 | 53 |
103
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
54 /** formatter for isodate tag */ |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
55 public static DateTimeFormatter dateFormatter = ISODateTimeFormat.date(); |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
56 |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
57 /** pattern for bibid in endnote-id attribute */ |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
58 public static final Pattern bibidPattern = Pattern.compile("#(\\d+)"); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
59 |
29 | 60 /** |
61 * Return the object's string representation or "null" if its null. | |
62 * | |
63 * @param s | |
64 * @return | |
65 */ | |
31 | 66 private static String defaultString(Object s) { |
29 | 67 if (s == null) { |
68 return "null"; | |
69 } else { | |
70 return s.toString(); | |
71 } | |
72 } | |
73 | |
74 | |
75 | 75 /** |
76 * Saves all content Entities with their Attributes and Relations in a XML file with the given fileName. | |
77 * | |
78 * @param fileName | |
79 * @param ps | |
80 */ | |
29 | 81 public static void backupEntities(String fileName, PersistenceService ps) { |
78 | 82 writeEntsAndRels(fileName, ps, Node.TYPE_ABOX, includeNormalizations); |
29 | 83 } |
84 | |
75 | 85 /** |
86 * Saves all definitions in a XML file with the given fileName. | |
87 * | |
88 * @param fileName | |
89 * @param ps | |
90 */ | |
29 | 91 public static void backupDefinitions(String fileName, PersistenceService ps) { |
78 | 92 writeEntsAndRels(fileName, ps, Node.TYPE_TBOX, false); |
29 | 93 } |
94 | |
95 /** | |
96 * Writes all entities of the given type and their relations to the XML file at fileName. | |
97 * | |
75 | 98 * Type is either TYPE_TBOX or TYPE_ABOX. |
99 * | |
29 | 100 * @param fileName |
101 * @param ps | |
75 | 102 * @param type |
29 | 103 */ |
78 | 104 private static void writeEntsAndRels(String fileName, PersistenceService ps, String type, boolean includeNorm) { |
29 | 105 OutputStreamWriter out; |
106 try { | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
107 // statistics collection Maps |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
108 Map<String, Map<String, Long>> entStats = new HashMap<String, Map<String, Long>>(); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
109 Map<String, Map<String, Long>> relStats = new HashMap<String, Map<String, Long>>(); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
110 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
111 // setup xml writer |
29 | 112 FileOutputStream fileOut = new FileOutputStream(fileName); |
113 out = new OutputStreamWriter(fileOut, "UTF-8"); | |
114 XMLOutputFactory factory = XMLOutputFactory.newInstance(); | |
115 XMLStreamWriter writer = factory.createXMLStreamWriter(out); | |
116 | |
117 int entitiesCount = 0; | |
118 | |
119 writer.writeStartDocument("UTF-8", "1.0"); | |
120 | |
121 if (type.equals(Node.TYPE_ABOX)) { | |
122 writer.writeStartElement(XMLUtil.OPENMIND_DATA); | |
87
8005f7011975
update version number in xml.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
86
diff
changeset
|
123 writer.writeAttribute("version", FORMAT_VERSION); |
75 | 124 // get number of content Entities |
29 | 125 entitiesCount = ps.getEntityCount(null).intValue(); |
126 } else { | |
127 writer.writeStartElement(XMLUtil.META_DATA); | |
87
8005f7011975
update version number in xml.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
86
diff
changeset
|
128 writer.writeAttribute("version", FORMAT_VERSION); |
75 | 129 // get number of definition Entities |
29 | 130 entitiesCount = ps.getEntityCount(Node.TYPE_TBOX).intValue(); |
131 } | |
132 | |
133 int numberOfPages = entitiesCount / itemsPerPage; | |
134 // debug: int numberOfPages = 1; | |
135 int counter = 0; | |
136 long start = System.currentTimeMillis(); | |
137 DecimalFormat df = new DecimalFormat("#.##"); | |
1 | 138 |
75 | 139 // list of Relations (filled from Entities) |
29 | 140 List<Relation> relList = new ArrayList<Relation>(); |
141 | |
142 /* | |
143 * write entities | |
144 */ | |
145 writer.writeStartElement((type.equals(Node.TYPE_TBOX)) ? XMLUtil.DEFINITIONS : XMLUtil.ENTITIES); | |
82
90f9a1c45b15
small change to xml format.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
79
diff
changeset
|
146 writer.writeAttribute("count", Integer.toString(entitiesCount)); |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
147 // iterate database by pages |
29 | 148 for (int currentPage = 0; currentPage <= numberOfPages; currentPage++) { |
149 int startRecord = currentPage * itemsPerPage; | |
150 List<Entity> entities; | |
151 | |
152 if (type.equals(Node.TYPE_ABOX)) { | |
75 | 153 // get page of content Entities |
29 | 154 entities = ps.getEntityPage(null, startRecord, itemsPerPage); |
155 } else { | |
75 | 156 // get page of definition Entities |
29 | 157 entities = ps.getEntityPage(Node.TYPE_TBOX, startRecord, itemsPerPage); |
158 } | |
159 | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
160 // iterate entities |
29 | 161 for (Entity ent : entities) { |
75 | 162 // write entity to XML |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
163 writeEntity(ent, writer, ps, includeNorm, entStats); |
29 | 164 // add (source)relations to list |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
165 List<Relation> rels = ent.getSourceRelations(); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
166 relList.addAll(rels); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
167 // update stats for relations |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
168 Map<String, Long> entRelStats = entStats.get(ent.getObjectClass()); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
169 for (Relation rel: rels) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
170 // update source relations |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
171 updateRelStats(rel, true, entRelStats); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
172 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
173 for (Relation rel: ent.getTargetRelations()) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
174 // update target relations |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
175 updateRelStats(rel, false, entRelStats); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
176 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
177 // count entities |
29 | 178 counter++; |
179 } | |
180 | |
181 long runtime = System.currentTimeMillis() - start; | |
182 double percent = ((double) counter / (double) entitiesCount) * 100.0; | |
183 logger.debug("(" + df.format(percent) + "%) \t[" + counter + "/" + entitiesCount + "]\t"); | |
184 logger.debug("Speed[ents/s]: " + df.format((double) counter / ((double) runtime / 1000))); | |
185 writer.flush(); | |
186 } | |
187 writer.writeEndElement(); | |
188 | |
189 /* | |
190 * write relations (from list) | |
191 */ | |
192 writer.writeStartElement(XMLUtil.RELATIONS); | |
82
90f9a1c45b15
small change to xml format.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
79
diff
changeset
|
193 writer.writeAttribute("count", Integer.toString(relList.size())); |
29 | 194 for (Relation rel : relList) { |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
195 writeRelation(rel, writer, includeNorm, relStats); |
29 | 196 } |
197 writer.writeEndElement(); | |
198 | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
199 /* |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
200 * write statistics |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
201 */ |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
202 // entity stats |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
203 writeStats(XMLUtil.ENTITY_STATS, XMLUtil.ENTITY, entStats, writer); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
204 // relation stats |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
205 writeStats(XMLUtil.RELATION_STATS, XMLUtil.RELATION, relStats, writer); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
206 |
29 | 207 // end file. |
208 writer.writeEndElement(); | |
209 | |
210 writer.flush(); | |
211 writer.close(); | |
212 | |
213 logger.info("END Stream Writer"); | |
214 } catch (IOException e) { | |
75 | 215 logger.error(e); |
29 | 216 } catch (XMLStreamException e) { |
75 | 217 logger.error(e); |
29 | 218 } |
219 } | |
1 | 220 |
29 | 221 /** |
222 * Write OpenMind relation to XML. | |
223 * | |
224 * @param rel | |
225 * @param writer | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
226 * @param relStats |
29 | 227 * @throws XMLStreamException |
228 */ | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
229 private static void writeRelation(Relation rel, XMLStreamWriter writer, boolean includeNorm, |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
230 Map<String, Map<String, Long>> relStats) throws XMLStreamException { |
29 | 231 writer.writeStartElement(XMLUtil.RELATION); |
232 | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
233 // update stats |
100
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
234 Map<String, Long> attStats = null; |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
235 if (relStats != null) { |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
236 attStats = updateNodeStats(rel, relStats); |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
237 } |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
238 |
29 | 239 /* |
240 * write XML-attributes | |
241 */ | |
242 writer.writeAttribute(XMLUtil.OBJECT_CLASS, defaultString(rel.getObjectClass())); | |
243 writer.writeAttribute(XMLUtil.ID, defaultString(rel.getId())); | |
244 writer.writeAttribute(XMLUtil.ROW_ID, defaultString(rel.getRowId())); | |
75 | 245 if (StringUtils.isNotEmpty(rel.getContentType())) { |
29 | 246 writer.writeAttribute(XMLUtil.CONTENT_TYPE, rel.getContentType()); |
75 | 247 } |
29 | 248 writer.writeAttribute(XMLUtil.RELATION_SOURCE_ID, defaultString(rel.getSourceId())); |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
249 writer.writeAttribute(XMLUtil.RELATION_SOURCE, defaultString(rel.getSourceObjectClass())); |
29 | 250 writer.writeAttribute(XMLUtil.RELATION_TARGET_ID, defaultString(rel.getTargetId())); |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
251 writer.writeAttribute(XMLUtil.RELATION_TARGET, defaultString(rel.getTargetObjectClass())); |
29 | 252 writer.writeAttribute(XMLUtil.VERSION, defaultString(rel.getVersion())); |
253 writer.writeAttribute(XMLUtil.MODIFICATION_TIME, defaultString(rel.getModificationTime())); | |
75 | 254 if (rel.getUser() != null) { |
29 | 255 writer.writeAttribute(XMLUtil.USER, rel.getUser()); |
75 | 256 } |
257 if (rel.getIsPublic()) { | |
29 | 258 writer.writeAttribute(XMLUtil.PUBLIC, "true"); |
75 | 259 } |
29 | 260 |
261 /* | |
262 * write OpenMind attributes of this relation as XML tags | |
263 */ | |
264 if (rel.getAttributes().size() > 0) { | |
265 writer.writeStartElement(XMLUtil.ATTRIBUTES); | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
266 for (Attribute att : rel.getAttributes()) { |
100
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
267 if (attStats != null) { |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
268 // update stats |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
269 updateAttStats(att, attStats); |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
270 } |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
271 // write xml |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
272 writeAttribute(att, writer, includeNorm); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
273 } |
29 | 274 writer.writeEndElement(); |
275 } | |
1 | 276 |
29 | 277 /* |
278 * write own value as content | |
279 */ | |
75 | 280 if (StringUtils.isNotEmpty(rel.getOwnValue())) { |
29 | 281 writer.writeCharacters(rel.getOwnValue()); |
75 | 282 } |
29 | 283 |
284 writer.writeEndElement(); | |
285 } | |
286 | |
287 /** | |
288 * Write OpenMind entity to XML. | |
289 * | |
290 * @param entity | |
291 * @param writer | |
292 * @param ps | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
293 * @param entStats |
29 | 294 * @throws XMLStreamException |
295 */ | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
296 private static void writeEntity(Entity entity, XMLStreamWriter writer, PersistenceService ps, boolean includeNorm, |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
297 Map<String, Map<String, Long>> entStats) |
29 | 298 throws XMLStreamException { |
1 | 299 |
29 | 300 writer.writeStartElement((entity.getType().equals(Node.TYPE_TBOX)) ? XMLUtil.DEFINITION : XMLUtil.ENTITY); |
301 | |
302 if (entity.isLightweight()) { | |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
303 // make sure we have all attributes and relations |
29 | 304 entity = ps.getEntityContent(entity); |
305 } | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
306 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
307 // update stats |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
308 Map<String, Long> attStats = updateNodeStats(entity, entStats); |
29 | 309 |
310 /* | |
311 * write XML attributes | |
312 */ | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
313 writer.writeAttribute(XMLUtil.OBJECT_CLASS, defaultString(entity.getObjectClass())); |
29 | 314 writer.writeAttribute(XMLUtil.ID, defaultString(entity.getId())); |
315 writer.writeAttribute(XMLUtil.ROW_ID, defaultString(entity.getRowId())); | |
75 | 316 if (StringUtils.isNotEmpty(entity.getContentType())) { |
29 | 317 writer.writeAttribute(XMLUtil.CONTENT_TYPE, entity.getContentType()); |
75 | 318 } |
29 | 319 writer.writeAttribute(XMLUtil.VERSION, defaultString(entity.getVersion())); |
320 writer.writeAttribute(XMLUtil.MODIFICATION_TIME, defaultString(entity.getModificationTime())); | |
75 | 321 if (entity.getUser() != null) { |
29 | 322 writer.writeAttribute(XMLUtil.USER, entity.getUser()); |
75 | 323 } |
324 if (entity.getIsPublic()) { | |
29 | 325 writer.writeAttribute(XMLUtil.PUBLIC, "true"); |
75 | 326 } |
1 | 327 |
29 | 328 /* |
329 * write OpenMind attributes of this entity as XML tags | |
330 */ | |
331 if (entity.getAttributes().size() > 0) { | |
332 writer.writeStartElement(XMLUtil.ATTRIBUTES); | |
333 for (Attribute att : entity.getAttributes()) { | |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
334 // update stats |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
335 updateAttStats(att, attStats); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
336 // write xml |
78 | 337 writeAttribute(att, writer, includeNorm); |
29 | 338 } |
339 writer.writeEndElement(); | |
340 } | |
341 | |
342 /* | |
100
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
343 * write outgoing relations of this entity as XML tags |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
344 */ |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
345 if (entity.getSourceRelations().size() > 0) { |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
346 writer.writeStartElement(XMLUtil.RELATIONS); |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
347 for (Relation rel : entity.getSourceRelations()) { |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
348 // write xml (without stats) |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
349 writeRelation(rel, writer, includeNorm, null); |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
350 } |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
351 writer.writeEndElement(); |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
352 } |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
353 |
734c0d8c7369
add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
87
diff
changeset
|
354 /* |
29 | 355 * write own value |
356 */ | |
77 | 357 String ov = entity.getOwnValue(); |
358 if (StringUtils.isNotEmpty(ov)) { | |
359 writer.writeCharacters(ov); | |
360 String nov = entity.getNormalizedOwnValue(); | |
361 if (includeNorm && StringUtils.isNotEmpty(nov) && !ov.equals(nov)) { | |
362 // write normalized value | |
363 writer.writeStartElement(XMLUtil.NORMALIZED); | |
364 writer.writeCharacters(nov); | |
365 writer.writeEndElement(); | |
366 } | |
75 | 367 } |
29 | 368 |
369 writer.writeEndElement(); | |
370 } | |
371 | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
372 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
373 private static void writeAttribute(Attribute att, XMLStreamWriter writer, boolean includeNorm) throws XMLStreamException { |
29 | 374 writer.writeStartElement(XMLUtil.ATTRIBUTE); |
375 | |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
376 String name = att.getName(); |
29 | 377 /* |
378 * write XML attributes | |
379 */ | |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
380 writer.writeAttribute(XMLUtil.ATTRIBUTE_NAME, defaultString(name)); |
29 | 381 writer.writeAttribute(XMLUtil.ID, defaultString(att.getId())); |
382 writer.writeAttribute(XMLUtil.ROW_ID, defaultString(att.getRowId())); | |
383 writer.writeAttribute(XMLUtil.CONTENT_TYPE, defaultString(att.getContentType())); | |
384 writer.writeAttribute(XMLUtil.VERSION, defaultString(att.getVersion())); | |
385 writer.writeAttribute(XMLUtil.MODIFICATION_TIME, defaultString(att.getModificationTime())); | |
75 | 386 if (att.getUser() != null) { |
29 | 387 writer.writeAttribute(XMLUtil.USER, att.getUser()); |
75 | 388 } |
389 if (att.getIsPublic()) { | |
29 | 390 writer.writeAttribute(XMLUtil.PUBLIC, "true"); |
75 | 391 } |
392 | |
29 | 393 /* |
394 * write value as content | |
395 */ | |
77 | 396 String ov = att.getValue(); |
397 if (StringUtils.isNotEmpty(ov)) { | |
398 writer.writeCharacters(ov); | |
399 String nov = att.getNormalizedOwnValue(); | |
400 if (includeNorm && StringUtils.isNotEmpty(nov) && !ov.equals(nov)) { | |
401 // write normalized value | |
402 writer.writeStartElement(XMLUtil.NORMALIZED); | |
403 writer.writeCharacters(nov); | |
404 writer.writeEndElement(); | |
405 } | |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
406 boolean processed = false; |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
407 // convert endnote-id into additional bibid element |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
408 if (!processed && name.equals("endnote-id")) { |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
409 Matcher bibidMatch = bibidPattern.matcher(ov); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
410 if (bibidMatch.find()) { |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
411 String bibid = bibidMatch.group(1); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
412 writer.writeStartElement(XMLUtil.BIBID); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
413 writer.writeCharacters(bibid); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
414 writer.writeEndElement(); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
415 processed = true; |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
416 } |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
417 } |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
418 // convert any date JSON into additional isodate element |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
419 if (!processed && ov.startsWith("{")) { |
103
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
420 try { |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
421 JSONObject json = new JSONObject(ov); |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
422 JSONObject date = null; |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
423 if (json.has("date")) { |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
424 date = json.getJSONObject("date"); |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
425 } else if (json.has("from")) { |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
426 date = json.getJSONObject("from"); |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
427 } |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
428 if (date != null) { |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
429 int year = date.getInt("year"); |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
430 int month = date.getInt("month"); |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
431 int day = date.getInt("dayOfMonth"); |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
432 DateTime dt = new DateTime(year, month, day, 0, 0); |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
433 writer.writeStartElement(XMLUtil.ISODATE); |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
434 writer.writeCharacters(dateFormatter.print(dt)); |
106
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
435 writer.writeEndElement(); |
93c7dbfaf062
add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
103
diff
changeset
|
436 processed = true; |
103
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
437 } |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
438 } catch (JSONException e) { |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
439 // maybe not JSON... |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
440 } |
1149eb948036
add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
100
diff
changeset
|
441 } |
75 | 442 } |
1 | 443 |
29 | 444 writer.writeEndElement(); |
445 } | |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
446 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
447 |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
448 private static void writeStats(String statsTag, String entryTag, Map<String, Map<String, Long>> nodeStats, |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
449 XMLStreamWriter writer) throws XMLStreamException { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
450 // write stats tag |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
451 writer.writeStartElement(statsTag); |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
452 |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
453 for (String nodeType : nodeStats.keySet()) { |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
454 Map<String, Long> attStats = nodeStats.get(nodeType); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
455 Long nodeCnt = attStats.get(ENT_KEY); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
456 // write tag for entity/attribute |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
457 writer.writeStartElement(entryTag); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
458 writer.writeAttribute(XMLUtil.OBJECT_CLASS, (nodeType == null) ? "null" : nodeType); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
459 writer.writeAttribute(XMLUtil.COUNT, nodeCnt.toString()); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
460 |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
461 // write attributes |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
462 for (String attName : attStats.keySet()) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
463 // skip ENT_KEY |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
464 if (attName.equals(ENT_KEY)) |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
465 continue; |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
466 if (attName.contains("[")) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
467 // write relation tag |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
468 writer.writeStartElement(XMLUtil.RELATION); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
469 } else { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
470 // write attribute tag |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
471 writer.writeStartElement(XMLUtil.ATTRIBUTE); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
472 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
473 writer.writeAttribute(XMLUtil.ATTRIBUTE_NAME, attName); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
474 Long attCnt = attStats.get(attName); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
475 writer.writeAttribute(XMLUtil.COUNT, attCnt.toString()); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
476 writer.writeEndElement(); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
477 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
478 // end of entity/attribute tag |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
479 writer.writeEndElement(); |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
480 } |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
481 // end of stats tag |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
482 writer.writeEndElement(); |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
483 } |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
484 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
485 /** |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
486 * @param objectClass |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
487 * @param entStats |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
488 * @return |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
489 */ |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
490 protected static Map<String, Long> updateNodeStats(Node ent, Map<String, Map<String, Long>> entStats) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
491 String objectClass = ent.getObjectClass(); |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
492 Map<String, Long> attStats = entStats.get(objectClass); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
493 if (attStats == null) { |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
494 // create new attribute stats entry |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
495 attStats = new HashMap<String, Long>(); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
496 // add key to count entities |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
497 attStats.put(ENT_KEY, 1l); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
498 // add to map |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
499 entStats.put(objectClass, attStats); |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
500 } else { |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
501 // increment entity count |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
502 Long entCnt = attStats.get(ENT_KEY); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
503 attStats.put(ENT_KEY, entCnt + 1); |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
504 } |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
505 return attStats; |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
506 } |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
507 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
508 /** |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
509 * @param att |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
510 * @param attStats |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
511 */ |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
512 protected static void updateAttStats(Attribute att, Map<String, Long> attStats) { |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
513 String attName = att.getName(); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
514 Long cnt = attStats.get(attName); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
515 if (cnt == null) { |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
516 attStats.put(attName, 1l); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
517 } else { |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
518 attStats.put(attName, cnt + 1); |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
519 } |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
520 } |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
521 |
86
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
522 /** |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
523 * Update relation statistics. |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
524 * |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
525 * Relation stats are saved like attribute stats but with "[entity-type]" before |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
526 * or after the relation name. |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
527 * |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
528 * @param rel |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
529 * @param relStats |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
530 */ |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
531 protected static void updateRelStats(Relation rel, boolean isSrcRel, Map<String, Long> relStats) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
532 String relName = rel.getObjectClass(); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
533 if (isSrcRel) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
534 relName = relName + "[" + rel.getTargetObjectClass() + "]"; |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
535 } else { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
536 relName = "[" + rel.getSourceObjectClass() + "]" + relName; |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
537 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
538 Long cnt = relStats.get(relName); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
539 if (cnt == null) { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
540 relStats.put(relName, 1l); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
541 } else { |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
542 relStats.put(relName, cnt + 1); |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
543 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
544 } |
d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents:
82
diff
changeset
|
545 |
79
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
546 |
b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
casties
parents:
78
diff
changeset
|
547 |
1 | 548 } |