changeset 79:b0aebac0780a

put statistics about number of entities, relations and attributes in xml dump. tags: entity-statistics, relation-statistics.
author casties
date Fri, 03 Mar 2017 18:59:20 +0100
parents b32b176a8aad
children 4c9ceb28cfd0
files src/main/java/org/mpi/openmind/repository/utils/OM4StreamWriter.java src/main/java/org/mpi/openmind/repository/utils/XMLUtil.java
diffstat 2 files changed, 126 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/src/main/java/org/mpi/openmind/repository/utils/OM4StreamWriter.java	Thu Mar 02 20:31:32 2017 +0100
+++ b/src/main/java/org/mpi/openmind/repository/utils/OM4StreamWriter.java	Fri Mar 03 18:59:20 2017 +0100
@@ -5,7 +5,9 @@
 import java.io.OutputStreamWriter;
 import java.text.DecimalFormat;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 import javax.xml.stream.XMLOutputFactory;
 import javax.xml.stream.XMLStreamException;
@@ -32,10 +34,13 @@
 
     private static Logger logger = Logger.getLogger(OM4StreamWriter.class);
 
-    private static int itemsPerPage = 500;
+    private static final int itemsPerPage = 500;
     
     /** Include normalized own-values. */
     public static boolean includeNormalizations = true;
+    
+    /** key for entity count in attribute counts map */
+    private static final String ENT_KEY = "<entity-count>";
 
     /**
      * Return the object's string representation or "null" if its null.
@@ -84,6 +89,11 @@
     private static void writeEntsAndRels(String fileName, PersistenceService ps, String type, boolean includeNorm) {
         OutputStreamWriter out;
         try {
+        	// statistics collection Maps
+            Map<String, Map<String, Long>> entStats = new HashMap<String, Map<String, Long>>();
+            Map<String, Map<String, Long>> relStats = new HashMap<String, Map<String, Long>>();
+            
+            // setup xml writer
             FileOutputStream fileOut = new FileOutputStream(fileName);
             out = new OutputStreamWriter(fileOut, "UTF-8");
             XMLOutputFactory factory = XMLOutputFactory.newInstance();
@@ -95,12 +105,12 @@
 
             if (type.equals(Node.TYPE_ABOX)) {
                 writer.writeStartElement(XMLUtil.OPENMIND_DATA);
-                writer.writeAttribute("version", "4.4");
+                writer.writeAttribute("version", "4.5");
                 // get number of content Entities
                 entitiesCount = ps.getEntityCount(null).intValue();
             } else {
                 writer.writeStartElement(XMLUtil.META_DATA);
-                writer.writeAttribute("version", "4.4");
+                writer.writeAttribute("version", "4.5");
                 // get number of definition Entities
                 entitiesCount = ps.getEntityCount(Node.TYPE_TBOX).intValue();
             }
@@ -119,7 +129,7 @@
              */
             writer.writeStartElement((type.equals(Node.TYPE_TBOX)) ? XMLUtil.DEFINITIONS : XMLUtil.ENTITIES);
             writer.writeAttribute("number", Integer.toString(entitiesCount));
-            // go through all pages
+            // iterate database by pages
             for (int currentPage = 0; currentPage <= numberOfPages; currentPage++) {
                 int startRecord = currentPage * itemsPerPage;
                 List<Entity> entities;
@@ -132,16 +142,14 @@
                     entities = ps.getEntityPage(Node.TYPE_TBOX, startRecord, itemsPerPage);
                 }
 
+                // iterate entities
                 for (Entity ent : entities) {
                     // write entity to XML
-                    writeEntity(ent, writer, ps, includeNorm);
+                    writeEntity(ent, writer, ps, includeNorm, entStats);
                     // add (source)relations to list
                     relList.addAll(ent.getSourceRelations());
                     
                     counter++;
-                    /* if ((counter % 50) == 0) {
-                        logger.debug("*");
-                    } */
                 }
                 
                 long runtime = System.currentTimeMillis() - start;
@@ -158,10 +166,18 @@
             writer.writeStartElement(XMLUtil.RELATIONS);
             writer.writeAttribute("number", Integer.toString(relList.size()));
             for (Relation rel : relList) {
-                writeRelation(rel, writer, includeNorm);
+                writeRelation(rel, writer, includeNorm, relStats);
             }
             writer.writeEndElement();
 
+            /*
+             * write statistics
+             */
+            // entity stats
+            writeStats(XMLUtil.ENTITY_STATS, XMLUtil.ENTITY, entStats, writer);
+            // relation stats
+            writeStats(XMLUtil.RELATION_STATS, XMLUtil.RELATION, relStats, writer);
+            
             // end file.
             writer.writeEndElement();
 
@@ -181,11 +197,16 @@
      * 
      * @param rel
      * @param writer
+     * @param relStats 
      * @throws XMLStreamException
      */
-    private static void writeRelation(Relation rel, XMLStreamWriter writer, boolean includeNorm) throws XMLStreamException {
+    private static void writeRelation(Relation rel, XMLStreamWriter writer, boolean includeNorm, 
+    		Map<String, Map<String, Long>> relStats) throws XMLStreamException {
         writer.writeStartElement(XMLUtil.RELATION);
 
+        // update stats
+        Map<String, Long> attStats = updateNodeStats(rel, relStats);
+
         /*
          * write XML-attributes
          */
@@ -211,9 +232,12 @@
          */
         if (rel.getAttributes().size() > 0) {
             writer.writeStartElement(XMLUtil.ATTRIBUTES);
-            for (Attribute att : rel.getAttributes()) {
-                writeAttribute(att, writer, includeNorm);
-            }
+			for (Attribute att : rel.getAttributes()) {
+				// update stats
+				updateAttStats(att, attStats);
+				// write xml
+				writeAttribute(att, writer, includeNorm);
+			}
             writer.writeEndElement();
         }
         
@@ -233,9 +257,11 @@
      * @param entity
      * @param writer
      * @param ps
+     * @param entStats 
      * @throws XMLStreamException
      */
-    private static void writeEntity(Entity entity, XMLStreamWriter writer, PersistenceService ps, boolean includeNorm)
+    private static void writeEntity(Entity entity, XMLStreamWriter writer, PersistenceService ps, boolean includeNorm, 
+    		Map<String, Map<String, Long>> entStats)
             throws XMLStreamException {
 
         writer.writeStartElement((entity.getType().equals(Node.TYPE_TBOX)) ? XMLUtil.DEFINITION : XMLUtil.ENTITY);
@@ -243,11 +269,14 @@
         if (entity.isLightweight()) {
             entity = ps.getEntityContent(entity);
         }
+        
+        // update stats
+        Map<String, Long> attStats = updateNodeStats(entity, entStats);
 
         /*
          * write XML attributes
          */
-        writer.writeAttribute(XMLUtil.OBJECT_CLASS, defaultString(entity.getObjectClass()));
+		writer.writeAttribute(XMLUtil.OBJECT_CLASS, defaultString(entity.getObjectClass()));
         writer.writeAttribute(XMLUtil.ID, defaultString(entity.getId()));
         writer.writeAttribute(XMLUtil.ROW_ID, defaultString(entity.getRowId()));
         if (StringUtils.isNotEmpty(entity.getContentType())) {
@@ -268,6 +297,9 @@
         if (entity.getAttributes().size() > 0) {
             writer.writeStartElement(XMLUtil.ATTRIBUTES);
             for (Attribute att : entity.getAttributes()) {
+            	// update stats
+            	updateAttStats(att, attStats);
+            	// write xml
                 writeAttribute(att, writer, includeNorm);
             }
             writer.writeEndElement();
@@ -291,7 +323,8 @@
         writer.writeEndElement();
     }
 
-    private static void writeAttribute(Attribute att, XMLStreamWriter writer, boolean includeNorm) throws XMLStreamException {
+
+	private static void writeAttribute(Attribute att, XMLStreamWriter writer, boolean includeNorm) throws XMLStreamException {
         writer.writeStartElement(XMLUtil.ATTRIBUTE);
 
         /*
@@ -327,4 +360,77 @@
         
         writer.writeEndElement();
     }
+
+	
+	private static void writeStats(String statsTag, String entryTag, Map<String, Map<String, Long>> nodeStats, XMLStreamWriter writer) 
+			throws XMLStreamException {
+		// write stats tag
+        writer.writeStartElement(statsTag);
+        
+        for (String nodeType : nodeStats.keySet()) {
+        	Map<String, Long> attStats = nodeStats.get(nodeType);
+        	Long nodeCnt = attStats.get(ENT_KEY);
+        	// write tag for entity/attribute
+        	writer.writeStartElement(entryTag);
+        	writer.writeAttribute(XMLUtil.OBJECT_CLASS, (nodeType == null)?"null":nodeType);
+        	writer.writeAttribute(XMLUtil.COUNT, nodeCnt.toString());
+        	
+        	// write attributes
+        	for (String attName : attStats.keySet()) {
+        		// skip ENT_KEY
+        		if (attName.equals(ENT_KEY)) continue;
+        		// write attribute tag
+        		writer.writeStartElement(XMLUtil.ATTRIBUTE);
+        		writer.writeAttribute(XMLUtil.ATTRIBUTE_NAME, attName);
+        		Long attCnt = attStats.get(attName);
+        		writer.writeAttribute(XMLUtil.COUNT, attCnt.toString());
+        		writer.writeEndElement();
+        	}
+        	// end of entity/attribute tag
+        	writer.writeEndElement();
+        }
+        // end of stats tag
+        writer.writeEndElement();
+	}
+	
+	
+	/**
+	 * @param objectClass
+	 * @param entStats
+	 * @return
+	 */
+	protected static Map<String, Long> updateNodeStats(Node ent, Map<String, Map<String, Long>> entStats) {
+		String objectClass = ent.getObjectClass();
+        Map<String, Long> attStats = entStats.get(objectClass);
+        if (attStats == null) {
+        	// create new attribute stats entry
+        	attStats = new HashMap<String, Long>();
+        	// add key to count entities
+        	attStats.put(ENT_KEY, 1l);
+        	// add to map
+        	entStats.put(objectClass, attStats);
+        } else {
+        	// increment entity count
+        	Long entCnt = attStats.get(ENT_KEY);
+        	attStats.put(ENT_KEY, entCnt + 1);
+        }
+		return attStats;
+	}
+
+    /**
+     * @param att
+     * @param attStats
+     */
+    protected static void updateAttStats(Attribute att, Map<String, Long> attStats) {
+		String attName = att.getName();
+		Long cnt = attStats.get(attName);
+		if (cnt == null) {
+			attStats.put(attName, 1l);
+		} else {
+			attStats.put(attName, cnt + 1);
+		}
+	}
+
+
+
 }
--- a/src/main/java/org/mpi/openmind/repository/utils/XMLUtil.java	Thu Mar 02 20:31:32 2017 +0100
+++ b/src/main/java/org/mpi/openmind/repository/utils/XMLUtil.java	Fri Mar 03 18:59:20 2017 +0100
@@ -69,7 +69,10 @@
     public static String ASSERTION = "assertion";
     public static String NORMALIZED = "norm";
     
-    //names used by the previous version of ismi.
+    public static String ENTITY_STATS = "entity-statistics";
+    public static String RELATION_STATS = "relation-statistics";
+    public static String COUNT = "count";
+    
     public static String META_DATA = "openmind-meta";
     public static String DEFINITIONS = "definitions";
     public static String DEFINITION = "definition";