annotate src/main/java/org/mpi/openmind/repository/utils/OM4StreamWriter.java @ 106:93c7dbfaf062

add bibid tag to xml export of endnote-id attributes.
author Robert Casties <casties@mpiwg-berlin.mpg.de>
date Fri, 26 Apr 2019 18:12:23 +0200
parents 1149eb948036
children 484be3266e54
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
jurzua
parents:
diff changeset
1 package org.mpi.openmind.repository.utils;
jurzua
parents:
diff changeset
2
jurzua
parents:
diff changeset
3 import java.io.FileOutputStream;
jurzua
parents:
diff changeset
4 import java.io.IOException;
jurzua
parents:
diff changeset
5 import java.io.OutputStreamWriter;
jurzua
parents:
diff changeset
6 import java.text.DecimalFormat;
jurzua
parents:
diff changeset
7 import java.util.ArrayList;
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
8 import java.util.HashMap;
1
jurzua
parents:
diff changeset
9 import java.util.List;
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
10 import java.util.Map;
106
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
11 import java.util.regex.Matcher;
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
12 import java.util.regex.Pattern;
1
jurzua
parents:
diff changeset
13
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
14 import javax.xml.stream.XMLOutputFactory;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
15 import javax.xml.stream.XMLStreamException;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
16 import javax.xml.stream.XMLStreamWriter;
1
jurzua
parents:
diff changeset
17
jurzua
parents:
diff changeset
18 import org.apache.commons.lang.StringUtils;
jurzua
parents:
diff changeset
19 import org.apache.log4j.Logger;
103
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
20 import org.joda.time.DateTime;
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
21 import org.joda.time.format.DateTimeFormatter;
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
22 import org.joda.time.format.ISODateTimeFormat;
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
23 import org.json.JSONException;
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
24 import org.json.JSONObject;
1
jurzua
parents:
diff changeset
25 import org.mpi.openmind.repository.bo.Attribute;
jurzua
parents:
diff changeset
26 import org.mpi.openmind.repository.bo.Entity;
jurzua
parents:
diff changeset
27 import org.mpi.openmind.repository.bo.Node;
jurzua
parents:
diff changeset
28 import org.mpi.openmind.repository.bo.Relation;
jurzua
parents:
diff changeset
29 import org.mpi.openmind.repository.services.PersistenceService;
jurzua
parents:
diff changeset
30
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents: 29
diff changeset
31 /**
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents: 29
diff changeset
32 * Export all entities and relations and definitions to XML.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents: 29
diff changeset
33 *
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
34 * Saves (content) entities and relations (i.e. assertions) and definitions
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents: 29
diff changeset
35 * (i.e. definition entities and relations) in separate files.
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents: 29
diff changeset
36 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents: 29
diff changeset
37 * @author jurzua, casties
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents: 29
diff changeset
38 *
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents: 29
diff changeset
39 */
1
jurzua
parents:
diff changeset
40 public class OM4StreamWriter {
jurzua
parents:
diff changeset
41
106
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
42 protected static final String FORMAT_VERSION = "4.10";
87
8005f7011975 update version number in xml.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 86
diff changeset
43
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
44 private static Logger logger = Logger.getLogger(OM4StreamWriter.class);
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
45
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
46 private static final int itemsPerPage = 500;
77
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
47
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
48 /** Include normalized own-values. */
78
b32b176a8aad make normalizations configurable (static).
casties
parents: 77
diff changeset
49 public static boolean includeNormalizations = true;
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
50
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
51 /** key for entity count in attribute counts map */
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
52 private static final String ENT_KEY = "<entity-count>";
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
53
103
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
54 /** formatter for isodate tag */
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
55 public static DateTimeFormatter dateFormatter = ISODateTimeFormat.date();
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
56
106
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
57 /** pattern for bibid in endnote-id attribute */
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
58 public static final Pattern bibidPattern = Pattern.compile("#(\\d+)");
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
59
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
60 /**
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
61 * Return the object's string representation or "null" if its null.
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
62 *
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
63 * @param s
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
64 * @return
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
65 */
31
7d8ebe8ac8a2 create reader and check script for XML dumps.
casties
parents: 29
diff changeset
66 private static String defaultString(Object s) {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
67 if (s == null) {
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
68 return "null";
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
69 } else {
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
70 return s.toString();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
71 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
72 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
73
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
74
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
75 /**
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
76 * Saves all content Entities with their Attributes and Relations in a XML file with the given fileName.
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
77 *
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
78 * @param fileName
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
79 * @param ps
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
80 */
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
81 public static void backupEntities(String fileName, PersistenceService ps) {
78
b32b176a8aad make normalizations configurable (static).
casties
parents: 77
diff changeset
82 writeEntsAndRels(fileName, ps, Node.TYPE_ABOX, includeNormalizations);
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
83 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
84
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
85 /**
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
86 * Saves all definitions in a XML file with the given fileName.
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
87 *
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
88 * @param fileName
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
89 * @param ps
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
90 */
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
91 public static void backupDefinitions(String fileName, PersistenceService ps) {
78
b32b176a8aad make normalizations configurable (static).
casties
parents: 77
diff changeset
92 writeEntsAndRels(fileName, ps, Node.TYPE_TBOX, false);
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
93 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
94
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
95 /**
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
96 * Writes all entities of the given type and their relations to the XML file at fileName.
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
97 *
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
98 * Type is either TYPE_TBOX or TYPE_ABOX.
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
99 *
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
100 * @param fileName
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
101 * @param ps
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
102 * @param type
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
103 */
78
b32b176a8aad make normalizations configurable (static).
casties
parents: 77
diff changeset
104 private static void writeEntsAndRels(String fileName, PersistenceService ps, String type, boolean includeNorm) {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
105 OutputStreamWriter out;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
106 try {
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
107 // statistics collection Maps
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
108 Map<String, Map<String, Long>> entStats = new HashMap<String, Map<String, Long>>();
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
109 Map<String, Map<String, Long>> relStats = new HashMap<String, Map<String, Long>>();
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
110
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
111 // setup xml writer
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
112 FileOutputStream fileOut = new FileOutputStream(fileName);
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
113 out = new OutputStreamWriter(fileOut, "UTF-8");
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
114 XMLOutputFactory factory = XMLOutputFactory.newInstance();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
115 XMLStreamWriter writer = factory.createXMLStreamWriter(out);
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
116
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
117 int entitiesCount = 0;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
118
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
119 writer.writeStartDocument("UTF-8", "1.0");
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
120
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
121 if (type.equals(Node.TYPE_ABOX)) {
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
122 writer.writeStartElement(XMLUtil.OPENMIND_DATA);
87
8005f7011975 update version number in xml.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 86
diff changeset
123 writer.writeAttribute("version", FORMAT_VERSION);
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
124 // get number of content Entities
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
125 entitiesCount = ps.getEntityCount(null).intValue();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
126 } else {
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
127 writer.writeStartElement(XMLUtil.META_DATA);
87
8005f7011975 update version number in xml.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 86
diff changeset
128 writer.writeAttribute("version", FORMAT_VERSION);
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
129 // get number of definition Entities
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
130 entitiesCount = ps.getEntityCount(Node.TYPE_TBOX).intValue();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
131 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
132
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
133 int numberOfPages = entitiesCount / itemsPerPage;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
134 // debug: int numberOfPages = 1;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
135 int counter = 0;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
136 long start = System.currentTimeMillis();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
137 DecimalFormat df = new DecimalFormat("#.##");
1
jurzua
parents:
diff changeset
138
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
139 // list of Relations (filled from Entities)
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
140 List<Relation> relList = new ArrayList<Relation>();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
141
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
142 /*
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
143 * write entities
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
144 */
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
145 writer.writeStartElement((type.equals(Node.TYPE_TBOX)) ? XMLUtil.DEFINITIONS : XMLUtil.ENTITIES);
82
90f9a1c45b15 small change to xml format.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 79
diff changeset
146 writer.writeAttribute("count", Integer.toString(entitiesCount));
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
147 // iterate database by pages
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
148 for (int currentPage = 0; currentPage <= numberOfPages; currentPage++) {
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
149 int startRecord = currentPage * itemsPerPage;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
150 List<Entity> entities;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
151
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
152 if (type.equals(Node.TYPE_ABOX)) {
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
153 // get page of content Entities
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
154 entities = ps.getEntityPage(null, startRecord, itemsPerPage);
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
155 } else {
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
156 // get page of definition Entities
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
157 entities = ps.getEntityPage(Node.TYPE_TBOX, startRecord, itemsPerPage);
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
158 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
159
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
160 // iterate entities
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
161 for (Entity ent : entities) {
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
162 // write entity to XML
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
163 writeEntity(ent, writer, ps, includeNorm, entStats);
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
164 // add (source)relations to list
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
165 List<Relation> rels = ent.getSourceRelations();
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
166 relList.addAll(rels);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
167 // update stats for relations
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
168 Map<String, Long> entRelStats = entStats.get(ent.getObjectClass());
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
169 for (Relation rel: rels) {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
170 // update source relations
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
171 updateRelStats(rel, true, entRelStats);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
172 }
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
173 for (Relation rel: ent.getTargetRelations()) {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
174 // update target relations
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
175 updateRelStats(rel, false, entRelStats);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
176 }
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
177 // count entities
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
178 counter++;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
179 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
180
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
181 long runtime = System.currentTimeMillis() - start;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
182 double percent = ((double) counter / (double) entitiesCount) * 100.0;
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
183 logger.debug("(" + df.format(percent) + "%) \t[" + counter + "/" + entitiesCount + "]\t");
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
184 logger.debug("Speed[ents/s]: " + df.format((double) counter / ((double) runtime / 1000)));
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
185 writer.flush();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
186 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
187 writer.writeEndElement();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
188
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
189 /*
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
190 * write relations (from list)
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
191 */
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
192 writer.writeStartElement(XMLUtil.RELATIONS);
82
90f9a1c45b15 small change to xml format.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 79
diff changeset
193 writer.writeAttribute("count", Integer.toString(relList.size()));
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
194 for (Relation rel : relList) {
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
195 writeRelation(rel, writer, includeNorm, relStats);
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
196 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
197 writer.writeEndElement();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
198
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
199 /*
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
200 * write statistics
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
201 */
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
202 // entity stats
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
203 writeStats(XMLUtil.ENTITY_STATS, XMLUtil.ENTITY, entStats, writer);
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
204 // relation stats
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
205 writeStats(XMLUtil.RELATION_STATS, XMLUtil.RELATION, relStats, writer);
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
206
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
207 // end file.
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
208 writer.writeEndElement();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
209
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
210 writer.flush();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
211 writer.close();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
212
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
213 logger.info("END Stream Writer");
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
214 } catch (IOException e) {
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
215 logger.error(e);
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
216 } catch (XMLStreamException e) {
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
217 logger.error(e);
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
218 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
219 }
1
jurzua
parents:
diff changeset
220
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
221 /**
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
222 * Write OpenMind relation to XML.
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
223 *
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
224 * @param rel
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
225 * @param writer
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
226 * @param relStats
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
227 * @throws XMLStreamException
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
228 */
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
229 private static void writeRelation(Relation rel, XMLStreamWriter writer, boolean includeNorm,
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
230 Map<String, Map<String, Long>> relStats) throws XMLStreamException {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
231 writer.writeStartElement(XMLUtil.RELATION);
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
232
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
233 // update stats
100
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
234 Map<String, Long> attStats = null;
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
235 if (relStats != null) {
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
236 attStats = updateNodeStats(rel, relStats);
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
237 }
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
238
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
239 /*
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
240 * write XML-attributes
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
241 */
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
242 writer.writeAttribute(XMLUtil.OBJECT_CLASS, defaultString(rel.getObjectClass()));
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
243 writer.writeAttribute(XMLUtil.ID, defaultString(rel.getId()));
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
244 writer.writeAttribute(XMLUtil.ROW_ID, defaultString(rel.getRowId()));
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
245 if (StringUtils.isNotEmpty(rel.getContentType())) {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
246 writer.writeAttribute(XMLUtil.CONTENT_TYPE, rel.getContentType());
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
247 }
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
248 writer.writeAttribute(XMLUtil.RELATION_SOURCE_ID, defaultString(rel.getSourceId()));
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
249 writer.writeAttribute(XMLUtil.RELATION_SOURCE, defaultString(rel.getSourceObjectClass()));
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
250 writer.writeAttribute(XMLUtil.RELATION_TARGET_ID, defaultString(rel.getTargetId()));
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
251 writer.writeAttribute(XMLUtil.RELATION_TARGET, defaultString(rel.getTargetObjectClass()));
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
252 writer.writeAttribute(XMLUtil.VERSION, defaultString(rel.getVersion()));
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
253 writer.writeAttribute(XMLUtil.MODIFICATION_TIME, defaultString(rel.getModificationTime()));
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
254 if (rel.getUser() != null) {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
255 writer.writeAttribute(XMLUtil.USER, rel.getUser());
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
256 }
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
257 if (rel.getIsPublic()) {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
258 writer.writeAttribute(XMLUtil.PUBLIC, "true");
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
259 }
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
260
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
261 /*
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
262 * write OpenMind attributes of this relation as XML tags
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
263 */
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
264 if (rel.getAttributes().size() > 0) {
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
265 writer.writeStartElement(XMLUtil.ATTRIBUTES);
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
266 for (Attribute att : rel.getAttributes()) {
100
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
267 if (attStats != null) {
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
268 // update stats
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
269 updateAttStats(att, attStats);
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
270 }
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
271 // write xml
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
272 writeAttribute(att, writer, includeNorm);
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
273 }
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
274 writer.writeEndElement();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
275 }
1
jurzua
parents:
diff changeset
276
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
277 /*
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
278 * write own value as content
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
279 */
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
280 if (StringUtils.isNotEmpty(rel.getOwnValue())) {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
281 writer.writeCharacters(rel.getOwnValue());
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
282 }
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
283
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
284 writer.writeEndElement();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
285 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
286
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
287 /**
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
288 * Write OpenMind entity to XML.
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
289 *
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
290 * @param entity
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
291 * @param writer
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
292 * @param ps
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
293 * @param entStats
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
294 * @throws XMLStreamException
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
295 */
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
296 private static void writeEntity(Entity entity, XMLStreamWriter writer, PersistenceService ps, boolean includeNorm,
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
297 Map<String, Map<String, Long>> entStats)
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
298 throws XMLStreamException {
1
jurzua
parents:
diff changeset
299
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
300 writer.writeStartElement((entity.getType().equals(Node.TYPE_TBOX)) ? XMLUtil.DEFINITION : XMLUtil.ENTITY);
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
301
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
302 if (entity.isLightweight()) {
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
303 // make sure we have all attributes and relations
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
304 entity = ps.getEntityContent(entity);
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
305 }
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
306
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
307 // update stats
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
308 Map<String, Long> attStats = updateNodeStats(entity, entStats);
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
309
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
310 /*
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
311 * write XML attributes
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
312 */
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
313 writer.writeAttribute(XMLUtil.OBJECT_CLASS, defaultString(entity.getObjectClass()));
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
314 writer.writeAttribute(XMLUtil.ID, defaultString(entity.getId()));
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
315 writer.writeAttribute(XMLUtil.ROW_ID, defaultString(entity.getRowId()));
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
316 if (StringUtils.isNotEmpty(entity.getContentType())) {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
317 writer.writeAttribute(XMLUtil.CONTENT_TYPE, entity.getContentType());
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
318 }
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
319 writer.writeAttribute(XMLUtil.VERSION, defaultString(entity.getVersion()));
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
320 writer.writeAttribute(XMLUtil.MODIFICATION_TIME, defaultString(entity.getModificationTime()));
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
321 if (entity.getUser() != null) {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
322 writer.writeAttribute(XMLUtil.USER, entity.getUser());
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
323 }
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
324 if (entity.getIsPublic()) {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
325 writer.writeAttribute(XMLUtil.PUBLIC, "true");
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
326 }
1
jurzua
parents:
diff changeset
327
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
328 /*
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
329 * write OpenMind attributes of this entity as XML tags
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
330 */
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
331 if (entity.getAttributes().size() > 0) {
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
332 writer.writeStartElement(XMLUtil.ATTRIBUTES);
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
333 for (Attribute att : entity.getAttributes()) {
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
334 // update stats
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
335 updateAttStats(att, attStats);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
336 // write xml
78
b32b176a8aad make normalizations configurable (static).
casties
parents: 77
diff changeset
337 writeAttribute(att, writer, includeNorm);
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
338 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
339 writer.writeEndElement();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
340 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
341
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
342 /*
100
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
343 * write outgoing relations of this entity as XML tags
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
344 */
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
345 if (entity.getSourceRelations().size() > 0) {
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
346 writer.writeStartElement(XMLUtil.RELATIONS);
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
347 for (Relation rel : entity.getSourceRelations()) {
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
348 // write xml (without stats)
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
349 writeRelation(rel, writer, includeNorm, null);
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
350 }
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
351 writer.writeEndElement();
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
352 }
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
353
734c0d8c7369 add relations-tag with source relations for each entity to XML dump format 4.8.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 87
diff changeset
354 /*
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
355 * write own value
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
356 */
77
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
357 String ov = entity.getOwnValue();
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
358 if (StringUtils.isNotEmpty(ov)) {
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
359 writer.writeCharacters(ov);
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
360 String nov = entity.getNormalizedOwnValue();
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
361 if (includeNorm && StringUtils.isNotEmpty(nov) && !ov.equals(nov)) {
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
362 // write normalized value
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
363 writer.writeStartElement(XMLUtil.NORMALIZED);
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
364 writer.writeCharacters(nov);
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
365 writer.writeEndElement();
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
366 }
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
367 }
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
368
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
369 writer.writeEndElement();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
370 }
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
371
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
372
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
373 private static void writeAttribute(Attribute att, XMLStreamWriter writer, boolean includeNorm) throws XMLStreamException {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
374 writer.writeStartElement(XMLUtil.ATTRIBUTE);
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
375
106
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
376 String name = att.getName();
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
377 /*
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
378 * write XML attributes
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
379 */
106
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
380 writer.writeAttribute(XMLUtil.ATTRIBUTE_NAME, defaultString(name));
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
381 writer.writeAttribute(XMLUtil.ID, defaultString(att.getId()));
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
382 writer.writeAttribute(XMLUtil.ROW_ID, defaultString(att.getRowId()));
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
383 writer.writeAttribute(XMLUtil.CONTENT_TYPE, defaultString(att.getContentType()));
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
384 writer.writeAttribute(XMLUtil.VERSION, defaultString(att.getVersion()));
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
385 writer.writeAttribute(XMLUtil.MODIFICATION_TIME, defaultString(att.getModificationTime()));
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
386 if (att.getUser() != null) {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
387 writer.writeAttribute(XMLUtil.USER, att.getUser());
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
388 }
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
389 if (att.getIsPublic()) {
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
390 writer.writeAttribute(XMLUtil.PUBLIC, "true");
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
391 }
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
392
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
393 /*
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
394 * write value as content
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
395 */
77
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
396 String ov = att.getValue();
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
397 if (StringUtils.isNotEmpty(ov)) {
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
398 writer.writeCharacters(ov);
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
399 String nov = att.getNormalizedOwnValue();
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
400 if (includeNorm && StringUtils.isNotEmpty(nov) && !ov.equals(nov)) {
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
401 // write normalized value
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
402 writer.writeStartElement(XMLUtil.NORMALIZED);
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
403 writer.writeCharacters(nov);
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
404 writer.writeEndElement();
a59984fd3c3f add normalized own-values to xml dump.
casties
parents: 75
diff changeset
405 }
106
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
406 boolean processed = false;
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
407 // convert endnote-id into additional bibid element
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
408 if (!processed && name.equals("endnote-id")) {
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
409 Matcher bibidMatch = bibidPattern.matcher(ov);
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
410 if (bibidMatch.find()) {
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
411 String bibid = bibidMatch.group(1);
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
412 writer.writeStartElement(XMLUtil.BIBID);
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
413 writer.writeCharacters(bibid);
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
414 writer.writeEndElement();
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
415 processed = true;
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
416 }
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
417 }
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
418 // convert any date JSON into additional isodate element
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
419 if (!processed && ov.startsWith("{")) {
103
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
420 try {
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
421 JSONObject json = new JSONObject(ov);
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
422 JSONObject date = null;
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
423 if (json.has("date")) {
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
424 date = json.getJSONObject("date");
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
425 } else if (json.has("from")) {
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
426 date = json.getJSONObject("from");
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
427 }
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
428 if (date != null) {
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
429 int year = date.getInt("year");
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
430 int month = date.getInt("month");
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
431 int day = date.getInt("dayOfMonth");
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
432 DateTime dt = new DateTime(year, month, day, 0, 0);
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
433 writer.writeStartElement(XMLUtil.ISODATE);
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
434 writer.writeCharacters(dateFormatter.print(dt));
106
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
435 writer.writeEndElement();
93c7dbfaf062 add bibid tag to xml export of endnote-id attributes.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 103
diff changeset
436 processed = true;
103
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
437 }
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
438 } catch (JSONException e) {
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
439 // maybe not JSON...
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
440 }
1149eb948036 add isodate tag to XML dump format 4.9.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 100
diff changeset
441 }
75
e0be7c0030f5 cleanup and better comments.
casties
parents: 31
diff changeset
442 }
1
jurzua
parents:
diff changeset
443
29
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
444 writer.writeEndElement();
5786aa6caeb3 new XML export and test script.
casties
parents: 1
diff changeset
445 }
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
446
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
447
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
448 private static void writeStats(String statsTag, String entryTag, Map<String, Map<String, Long>> nodeStats,
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
449 XMLStreamWriter writer) throws XMLStreamException {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
450 // write stats tag
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
451 writer.writeStartElement(statsTag);
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
452
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
453 for (String nodeType : nodeStats.keySet()) {
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
454 Map<String, Long> attStats = nodeStats.get(nodeType);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
455 Long nodeCnt = attStats.get(ENT_KEY);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
456 // write tag for entity/attribute
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
457 writer.writeStartElement(entryTag);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
458 writer.writeAttribute(XMLUtil.OBJECT_CLASS, (nodeType == null) ? "null" : nodeType);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
459 writer.writeAttribute(XMLUtil.COUNT, nodeCnt.toString());
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
460
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
461 // write attributes
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
462 for (String attName : attStats.keySet()) {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
463 // skip ENT_KEY
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
464 if (attName.equals(ENT_KEY))
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
465 continue;
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
466 if (attName.contains("[")) {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
467 // write relation tag
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
468 writer.writeStartElement(XMLUtil.RELATION);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
469 } else {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
470 // write attribute tag
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
471 writer.writeStartElement(XMLUtil.ATTRIBUTE);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
472 }
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
473 writer.writeAttribute(XMLUtil.ATTRIBUTE_NAME, attName);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
474 Long attCnt = attStats.get(attName);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
475 writer.writeAttribute(XMLUtil.COUNT, attCnt.toString());
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
476 writer.writeEndElement();
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
477 }
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
478 // end of entity/attribute tag
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
479 writer.writeEndElement();
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
480 }
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
481 // end of stats tag
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
482 writer.writeEndElement();
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
483 }
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
484
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
485 /**
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
486 * @param objectClass
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
487 * @param entStats
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
488 * @return
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
489 */
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
490 protected static Map<String, Long> updateNodeStats(Node ent, Map<String, Map<String, Long>> entStats) {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
491 String objectClass = ent.getObjectClass();
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
492 Map<String, Long> attStats = entStats.get(objectClass);
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
493 if (attStats == null) {
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
494 // create new attribute stats entry
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
495 attStats = new HashMap<String, Long>();
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
496 // add key to count entities
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
497 attStats.put(ENT_KEY, 1l);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
498 // add to map
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
499 entStats.put(objectClass, attStats);
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
500 } else {
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
501 // increment entity count
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
502 Long entCnt = attStats.get(ENT_KEY);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
503 attStats.put(ENT_KEY, entCnt + 1);
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
504 }
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
505 return attStats;
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
506 }
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
507
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
508 /**
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
509 * @param att
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
510 * @param attStats
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
511 */
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
512 protected static void updateAttStats(Attribute att, Map<String, Long> attStats) {
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
513 String attName = att.getName();
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
514 Long cnt = attStats.get(attName);
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
515 if (cnt == null) {
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
516 attStats.put(attName, 1l);
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
517 } else {
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
518 attStats.put(attName, cnt + 1);
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
519 }
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
520 }
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
521
86
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
522 /**
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
523 * Update relation statistics.
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
524 *
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
525 * Relation stats are saved like attribute stats but with "[entity-type]" before
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
526 * or after the relation name.
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
527 *
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
528 * @param rel
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
529 * @param relStats
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
530 */
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
531 protected static void updateRelStats(Relation rel, boolean isSrcRel, Map<String, Long> relStats) {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
532 String relName = rel.getObjectClass();
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
533 if (isSrcRel) {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
534 relName = relName + "[" + rel.getTargetObjectClass() + "]";
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
535 } else {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
536 relName = "[" + rel.getSourceObjectClass() + "]" + relName;
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
537 }
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
538 Long cnt = relStats.get(relName);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
539 if (cnt == null) {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
540 relStats.put(relName, 1l);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
541 } else {
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
542 relStats.put(relName, cnt + 1);
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
543 }
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
544 }
d4b456623d43 Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
Robert Casties <casties@mpiwg-berlin.mpg.de>
parents: 82
diff changeset
545
79
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
546
b0aebac0780a put statistics about number of entities, relations and attributes in xml dump.
casties
parents: 78
diff changeset
547
1
jurzua
parents:
diff changeset
548 }