Mercurial > hg > openmind
changeset 79:b0aebac0780a
put statistics about number of entities, relations and attributes in xml dump.
tags: entity-statistics, relation-statistics.
author | casties |
---|---|
date | Fri, 03 Mar 2017 18:59:20 +0100 |
parents | b32b176a8aad |
children | 4c9ceb28cfd0 |
files | src/main/java/org/mpi/openmind/repository/utils/OM4StreamWriter.java src/main/java/org/mpi/openmind/repository/utils/XMLUtil.java |
diffstat | 2 files changed, 126 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/src/main/java/org/mpi/openmind/repository/utils/OM4StreamWriter.java Thu Mar 02 20:31:32 2017 +0100 +++ b/src/main/java/org/mpi/openmind/repository/utils/OM4StreamWriter.java Fri Mar 03 18:59:20 2017 +0100 @@ -5,7 +5,9 @@ import java.io.OutputStreamWriter; import java.text.DecimalFormat; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; @@ -32,10 +34,13 @@ private static Logger logger = Logger.getLogger(OM4StreamWriter.class); - private static int itemsPerPage = 500; + private static final int itemsPerPage = 500; /** Include normalized own-values. */ public static boolean includeNormalizations = true; + + /** key for entity count in attribute counts map */ + private static final String ENT_KEY = "<entity-count>"; /** * Return the object's string representation or "null" if its null. @@ -84,6 +89,11 @@ private static void writeEntsAndRels(String fileName, PersistenceService ps, String type, boolean includeNorm) { OutputStreamWriter out; try { + // statistics collection Maps + Map<String, Map<String, Long>> entStats = new HashMap<String, Map<String, Long>>(); + Map<String, Map<String, Long>> relStats = new HashMap<String, Map<String, Long>>(); + + // setup xml writer FileOutputStream fileOut = new FileOutputStream(fileName); out = new OutputStreamWriter(fileOut, "UTF-8"); XMLOutputFactory factory = XMLOutputFactory.newInstance(); @@ -95,12 +105,12 @@ if (type.equals(Node.TYPE_ABOX)) { writer.writeStartElement(XMLUtil.OPENMIND_DATA); - writer.writeAttribute("version", "4.4"); + writer.writeAttribute("version", "4.5"); // get number of content Entities entitiesCount = ps.getEntityCount(null).intValue(); } else { writer.writeStartElement(XMLUtil.META_DATA); - writer.writeAttribute("version", "4.4"); + writer.writeAttribute("version", "4.5"); // get number of definition Entities entitiesCount = ps.getEntityCount(Node.TYPE_TBOX).intValue(); } @@ -119,7 +129,7 @@ */ writer.writeStartElement((type.equals(Node.TYPE_TBOX)) ? XMLUtil.DEFINITIONS : XMLUtil.ENTITIES); writer.writeAttribute("number", Integer.toString(entitiesCount)); - // go through all pages + // iterate database by pages for (int currentPage = 0; currentPage <= numberOfPages; currentPage++) { int startRecord = currentPage * itemsPerPage; List<Entity> entities; @@ -132,16 +142,14 @@ entities = ps.getEntityPage(Node.TYPE_TBOX, startRecord, itemsPerPage); } + // iterate entities for (Entity ent : entities) { // write entity to XML - writeEntity(ent, writer, ps, includeNorm); + writeEntity(ent, writer, ps, includeNorm, entStats); // add (source)relations to list relList.addAll(ent.getSourceRelations()); counter++; - /* if ((counter % 50) == 0) { - logger.debug("*"); - } */ } long runtime = System.currentTimeMillis() - start; @@ -158,10 +166,18 @@ writer.writeStartElement(XMLUtil.RELATIONS); writer.writeAttribute("number", Integer.toString(relList.size())); for (Relation rel : relList) { - writeRelation(rel, writer, includeNorm); + writeRelation(rel, writer, includeNorm, relStats); } writer.writeEndElement(); + /* + * write statistics + */ + // entity stats + writeStats(XMLUtil.ENTITY_STATS, XMLUtil.ENTITY, entStats, writer); + // relation stats + writeStats(XMLUtil.RELATION_STATS, XMLUtil.RELATION, relStats, writer); + // end file. writer.writeEndElement(); @@ -181,11 +197,16 @@ * * @param rel * @param writer + * @param relStats * @throws XMLStreamException */ - private static void writeRelation(Relation rel, XMLStreamWriter writer, boolean includeNorm) throws XMLStreamException { + private static void writeRelation(Relation rel, XMLStreamWriter writer, boolean includeNorm, + Map<String, Map<String, Long>> relStats) throws XMLStreamException { writer.writeStartElement(XMLUtil.RELATION); + // update stats + Map<String, Long> attStats = updateNodeStats(rel, relStats); + /* * write XML-attributes */ @@ -211,9 +232,12 @@ */ if (rel.getAttributes().size() > 0) { writer.writeStartElement(XMLUtil.ATTRIBUTES); - for (Attribute att : rel.getAttributes()) { - writeAttribute(att, writer, includeNorm); - } + for (Attribute att : rel.getAttributes()) { + // update stats + updateAttStats(att, attStats); + // write xml + writeAttribute(att, writer, includeNorm); + } writer.writeEndElement(); } @@ -233,9 +257,11 @@ * @param entity * @param writer * @param ps + * @param entStats * @throws XMLStreamException */ - private static void writeEntity(Entity entity, XMLStreamWriter writer, PersistenceService ps, boolean includeNorm) + private static void writeEntity(Entity entity, XMLStreamWriter writer, PersistenceService ps, boolean includeNorm, + Map<String, Map<String, Long>> entStats) throws XMLStreamException { writer.writeStartElement((entity.getType().equals(Node.TYPE_TBOX)) ? XMLUtil.DEFINITION : XMLUtil.ENTITY); @@ -243,11 +269,14 @@ if (entity.isLightweight()) { entity = ps.getEntityContent(entity); } + + // update stats + Map<String, Long> attStats = updateNodeStats(entity, entStats); /* * write XML attributes */ - writer.writeAttribute(XMLUtil.OBJECT_CLASS, defaultString(entity.getObjectClass())); + writer.writeAttribute(XMLUtil.OBJECT_CLASS, defaultString(entity.getObjectClass())); writer.writeAttribute(XMLUtil.ID, defaultString(entity.getId())); writer.writeAttribute(XMLUtil.ROW_ID, defaultString(entity.getRowId())); if (StringUtils.isNotEmpty(entity.getContentType())) { @@ -268,6 +297,9 @@ if (entity.getAttributes().size() > 0) { writer.writeStartElement(XMLUtil.ATTRIBUTES); for (Attribute att : entity.getAttributes()) { + // update stats + updateAttStats(att, attStats); + // write xml writeAttribute(att, writer, includeNorm); } writer.writeEndElement(); @@ -291,7 +323,8 @@ writer.writeEndElement(); } - private static void writeAttribute(Attribute att, XMLStreamWriter writer, boolean includeNorm) throws XMLStreamException { + + private static void writeAttribute(Attribute att, XMLStreamWriter writer, boolean includeNorm) throws XMLStreamException { writer.writeStartElement(XMLUtil.ATTRIBUTE); /* @@ -327,4 +360,77 @@ writer.writeEndElement(); } + + + private static void writeStats(String statsTag, String entryTag, Map<String, Map<String, Long>> nodeStats, XMLStreamWriter writer) + throws XMLStreamException { + // write stats tag + writer.writeStartElement(statsTag); + + for (String nodeType : nodeStats.keySet()) { + Map<String, Long> attStats = nodeStats.get(nodeType); + Long nodeCnt = attStats.get(ENT_KEY); + // write tag for entity/attribute + writer.writeStartElement(entryTag); + writer.writeAttribute(XMLUtil.OBJECT_CLASS, (nodeType == null)?"null":nodeType); + writer.writeAttribute(XMLUtil.COUNT, nodeCnt.toString()); + + // write attributes + for (String attName : attStats.keySet()) { + // skip ENT_KEY + if (attName.equals(ENT_KEY)) continue; + // write attribute tag + writer.writeStartElement(XMLUtil.ATTRIBUTE); + writer.writeAttribute(XMLUtil.ATTRIBUTE_NAME, attName); + Long attCnt = attStats.get(attName); + writer.writeAttribute(XMLUtil.COUNT, attCnt.toString()); + writer.writeEndElement(); + } + // end of entity/attribute tag + writer.writeEndElement(); + } + // end of stats tag + writer.writeEndElement(); + } + + + /** + * @param objectClass + * @param entStats + * @return + */ + protected static Map<String, Long> updateNodeStats(Node ent, Map<String, Map<String, Long>> entStats) { + String objectClass = ent.getObjectClass(); + Map<String, Long> attStats = entStats.get(objectClass); + if (attStats == null) { + // create new attribute stats entry + attStats = new HashMap<String, Long>(); + // add key to count entities + attStats.put(ENT_KEY, 1l); + // add to map + entStats.put(objectClass, attStats); + } else { + // increment entity count + Long entCnt = attStats.get(ENT_KEY); + attStats.put(ENT_KEY, entCnt + 1); + } + return attStats; + } + + /** + * @param att + * @param attStats + */ + protected static void updateAttStats(Attribute att, Map<String, Long> attStats) { + String attName = att.getName(); + Long cnt = attStats.get(attName); + if (cnt == null) { + attStats.put(attName, 1l); + } else { + attStats.put(attName, cnt + 1); + } + } + + + }
--- a/src/main/java/org/mpi/openmind/repository/utils/XMLUtil.java Thu Mar 02 20:31:32 2017 +0100 +++ b/src/main/java/org/mpi/openmind/repository/utils/XMLUtil.java Fri Mar 03 18:59:20 2017 +0100 @@ -69,7 +69,10 @@ public static String ASSERTION = "assertion"; public static String NORMALIZED = "norm"; - //names used by the previous version of ismi. + public static String ENTITY_STATS = "entity-statistics"; + public static String RELATION_STATS = "relation-statistics"; + public static String COUNT = "count"; + public static String META_DATA = "openmind-meta"; public static String DEFINITIONS = "definitions"; public static String DEFINITION = "definition";