Mercurial > hg > openmind
changeset 49:2d669fdd0a8b
New CsvNodeListReader for import-merge.
author | casties |
---|---|
date | Thu, 27 Oct 2016 20:21:19 +0200 |
parents | 69c233694e76 |
children | 5bf964077567 |
files | pom.xml src/main/java/org/mpi/openmind/repository/bo/Attribute.java src/main/java/org/mpi/openmind/repository/bo/Entity.java src/main/java/org/mpi/openmind/repository/bo/Node.java src/main/java/org/mpi/openmind/repository/bo/Relation.java src/main/java/org/mpi/openmind/repository/utils/CsvNodeListReader.java |
diffstat | 6 files changed, 239 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/pom.xml Tue Oct 25 18:42:05 2016 +0200 +++ b/pom.xml Thu Oct 27 20:21:19 2016 +0200 @@ -163,7 +163,17 @@ <version>2.5</version> <scope>provided</scope> </dependency> --> - + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-csv</artifactId> + <version>1.4</version> + </dependency> + <dependency> + <groupId>commons-codec</groupId> + <artifactId>commons-codec</artifactId> + <version>1.10</version> + </dependency> </dependencies> <build>
--- a/src/main/java/org/mpi/openmind/repository/bo/Attribute.java Tue Oct 25 18:42:05 2016 +0200 +++ b/src/main/java/org/mpi/openmind/repository/bo/Attribute.java Thu Oct 27 20:21:19 2016 +0200 @@ -246,4 +246,9 @@ } return "!!!ENCODING-ERROR!!!"; } + + @Override + public String getNodeType() { + return "ATTRIBUTE"; + } }
--- a/src/main/java/org/mpi/openmind/repository/bo/Entity.java Tue Oct 25 18:42:05 2016 +0200 +++ b/src/main/java/org/mpi/openmind/repository/bo/Entity.java Thu Oct 27 20:21:19 2016 +0200 @@ -1286,4 +1286,9 @@ } } + + @Override + public String getNodeType() { + return "ENTITY"; + } }
--- a/src/main/java/org/mpi/openmind/repository/bo/Node.java Tue Oct 25 18:42:05 2016 +0200 +++ b/src/main/java/org/mpi/openmind/repository/bo/Node.java Thu Oct 27 20:21:19 2016 +0200 @@ -400,4 +400,7 @@ return "!!!ENCODING-ERROR!!!"; } + public String getNodeType() { + return "NODE"; + } }
--- a/src/main/java/org/mpi/openmind/repository/bo/Relation.java Tue Oct 25 18:42:05 2016 +0200 +++ b/src/main/java/org/mpi/openmind/repository/bo/Relation.java Thu Oct 27 20:21:19 2016 +0200 @@ -317,4 +317,10 @@ this.getOwnValue(), this.getId()); } + + @Override + public String getNodeType() { + return "RELATION"; + } + }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/main/java/org/mpi/openmind/repository/utils/CsvNodeListReader.java Thu Oct 27 20:21:19 2016 +0200 @@ -0,0 +1,209 @@ +/** + * + */ +package org.mpi.openmind.repository.utils; + +import java.io.Reader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.codec.binary.StringUtils; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVRecord; +import org.mpi.openmind.repository.bo.Attribute; +import org.mpi.openmind.repository.bo.Entity; +import org.mpi.openmind.repository.bo.Node; +import org.mpi.openmind.repository.bo.Relation; + +/** + * Class that reads a CSV file into a list of Nodes. + * + * @author casties + * + */ +public class CsvNodeListReader { + + public static List<Node> readCsv(Reader input) throws Exception { + ArrayList<Node> nodeList = new ArrayList<Node>(); + HashMap<Long, Entity> entMap = new HashMap<Long, Entity>(); + HashMap<Long, Relation> relMap = new HashMap<Long, Relation>(); + // parse all records from CSV file + for (CSVRecord csvRecord : CSVFormat.DEFAULT.withHeader().parse(input)) { + // will throw exception if node_type is missing + String node_type = csvRecord.get("node_type"); + Node node = null; + + if (node_type.equals("ENTITY")) { + //System.out.println("Entity: "+csvRecord); + Entity ent = new Entity(); + // TODO: is lightweight ok if we want to add attributes? + ent.setLightweight(false); + if (csvRecord.isSet("id")) { + Long id = Long.decode(csvRecord.get("id")); + ent.setId(id); + entMap.put(id, ent); + } + nodeList.add(ent); + node = ent; + + } else if (node_type.equals("ATTRIBUTE")) { + //System.out.println("Attribute: "+csvRecord); + Attribute att = new Attribute(); + // throws exception if no source_id + Long source_id = Long.decode(csvRecord.get("source_id")); + att.setSourceId(source_id); + // save Attribute + nodeList.add(att); + // set optional properties + if (csvRecord.isSet("id")) { + Long id = Long.decode(csvRecord.get("id")); + att.setId(id); + } + if (csvRecord.isSet("source_modif")) { + Long mtime = Long.decode(csvRecord.get("source_modif")); + att.setSourceModif(mtime); + } + if (csvRecord.isSet("source_obj_class")) { + String oc = csvRecord.get("source_obj_class"); + att.setSourceObjectClass(oc); + } + node = att; + + } else if (node_type.equals("RELATION")) { + //System.out.println("Relation: "+csvRecord); + Relation rel = new Relation(); + if (csvRecord.isSet("id")) { + Long id = Long.decode(csvRecord.get("id")); + rel.setId(id); + relMap.put(id, rel); + } + // throws exception if no source_id or target_id + Long source_id = Long.decode(csvRecord.get("source_id")); + rel.setSourceId(source_id); + Long target_id = Long.decode(csvRecord.get("target_id")); + rel.setTargetId(target_id); + // set optional properties + if (csvRecord.isSet("source_modif")) { + Long mtime = Long.decode(csvRecord.get("source_modif")); + rel.setSourceModif(mtime); + } + if (csvRecord.isSet("source_obj_class")) { + String oc = csvRecord.get("source_obj_class"); + rel.setSourceObjectClass(oc); + } + if (csvRecord.isSet("target_modif")) { + Long mtime = Long.decode(csvRecord.get("target_modif")); + rel.setTargetModif(mtime); + } + if (csvRecord.isSet("target_obj_class")) { + String oc = csvRecord.get("target_obj_class"); + rel.setTargetObjectClass(oc); + } + nodeList.add(rel); + node = rel; + + } else { + // unknown node_type + throw new Exception("Unknown node_type "+node_type); + } + + /* + * set common node properties + */ + if (csvRecord.isSet("row_id")) { + Long row_id = Long.decode(csvRecord.get("row_id")); + // TODO: should we really set row_id? + node.setRowId(row_id); + } + if (csvRecord.isSet("object_class")) { + String oc = csvRecord.get("object_class"); + node.setObjectClass(oc); + } + if (csvRecord.isSet("user")) { + String user = csvRecord.get("user"); + node.setUser(user); + } + if (csvRecord.isSet("public")) { + boolean pub = csvRecord.get("public").equals("true"); + node.setIsPublic(pub); + } + if (csvRecord.isSet("type")) { + String type = csvRecord.get("type"); + node.setType(type); + } + if (csvRecord.isSet("version")) { + Long version = Long.decode(csvRecord.get("version")); + node.setVersion(version); + } + if (csvRecord.isSet("type")) { + String type = csvRecord.get("type"); + node.setType(type); + } + if (csvRecord.isSet("modification_time")) { + Long mtime = Long.decode(csvRecord.get("modification_time")); + node.setModificationTime(mtime); + } + if (csvRecord.isSet("system_status")) { + String status = csvRecord.get("system_status"); + node.setSystemStatus(status); + } + if (csvRecord.isSet("content_type")) { + String ct = csvRecord.get("content_type"); + node.setContentType(ct); + } + if (csvRecord.isSet("own_value_b64")) { + // get base64-encoded own_value + String ov_b64 = csvRecord.get("own_value_b64"); + // decode + byte[] ov_b = Base64.decodeBase64(ov_b64); + String ov = StringUtils.newStringUtf8(ov_b); + node.setOwnValue(ov); + } + + } + /* + * re-read list and attach attributes and relations to entities + */ + @SuppressWarnings("unchecked") + List<Node> oldNodeList = (List<Node>) nodeList.clone(); + for (Node n : oldNodeList) { + if (n.getNodeType().equals("ATTRIBUTE")) { + Attribute att = (Attribute) n; + Long source_id = att.getSourceId(); + if (entMap.containsKey(source_id)) { + Entity ent = entMap.get(source_id); + // add attribute to entity + ent.addAttribute(att); + // remove attribute from list + nodeList.remove(att); + } else if (relMap.containsKey(source_id)) { + Relation rel = relMap.get(source_id); + // add attribute to relation + rel.addAttribute(att); + // remove attribute from list + nodeList.remove(att); + } + } else if (n.getNodeType().equals("RELATION")) { + Relation rel = (Relation) n; + Long source_id = rel.getSourceId(); + if (entMap.containsKey(source_id)) { + Entity ent = entMap.get(source_id); + // add attribute to entity + ent.addSourceRelation(rel); + } + Long target_id = rel.getTargetId(); + if (entMap.containsKey(target_id)) { + Entity ent = entMap.get(target_id); + // add attribute to entity + ent.addTargetRelation(rel); + } + } + + } + + return nodeList; + + } +}