changeset 23:d2d4cd129f5e

new importer for DIGITALIZATIONs using the Diva manifest HTTP endpoint.
author casties
date Mon, 27 Jun 2016 19:11:36 -0400
parents 165b1efb85cd
children 1c034e2f7367
files src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java
diffstat 1 files changed, 167 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java	Mon Jun 27 19:11:36 2016 -0400
@@ -0,0 +1,167 @@
+package org.mpi.openmind.scripts;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.util.EntityUtils;
+import org.apache.log4j.ConsoleAppender;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.log4j.PatternLayout;
+import org.mpi.openmind.cache.WrapperService;
+import org.mpi.openmind.repository.bo.Attribute;
+import org.mpi.openmind.repository.bo.Entity;
+import org.mpi.openmind.repository.bo.Node;
+import org.mpi.openmind.repository.bo.Relation;
+import org.mpi.openmind.repository.services.ServiceRegistry;
+import org.mpi.openmind.repository.services.utils.AttributeFilter;
+
+
+public class DivaImportHttp {
+	
+	static{
+		ConsoleAppender console = new ConsoleAppender(); //create appender
+		  //configure the appender
+		  String PATTERN = "%d [%p|%c|%C{1}] %m%n";
+		  console.setLayout(new PatternLayout(PATTERN)); 
+		  console.setThreshold(Level.INFO);
+		  console.activateOptions();
+		  //add appender to any Logger (here is root)
+		  Logger.getRootLogger().addAppender(console);
+	}
+
+	public static String DIGITALIZATION = "DIGITALIZATION";
+	public static String userName = "diva-import";
+	
+	public static void execute(){
+		ServiceRegistry services = new ServiceRegistry();
+		createDataModel(services.getWrapper());
+		importData(services.getWrapper());
+	}
+	
+	private static void createDataModel(WrapperService ontology){
+		
+		try {
+			
+			Entity digi = new Entity(Node.TYPE_TBOX, Node.TYPE_TBOX, false);
+			digi.setOwnValue(DIGITALIZATION);
+			
+			digi = ontology.saveLWDefinition(digi, userName);
+			
+			Attribute attName = new Attribute(Node.TYPE_TBOX, "text", "name");
+			attName.setSourceId(digi.getId());
+			attName.setSourceObjectClass(Node.TYPE_TBOX);
+			attName.setSourceModif(digi.getModificationTime());
+			attName.setSystemStatus(Node.SYS_STATUS_CURRENT_VERSION);
+			
+			ontology.saveDefAttribute(attName, userName);
+			
+			Attribute num_files = new Attribute(Node.TYPE_TBOX, "text", "num_files");
+			num_files.setSourceId(digi.getId());
+			num_files.setSourceObjectClass(Node.TYPE_TBOX);
+			num_files.setSourceModif(digi.getModificationTime());
+			num_files.setSystemStatus(Node.SYS_STATUS_CURRENT_VERSION);
+			
+			ontology.saveDefAttribute(num_files, userName);
+			
+			
+			//DIGI is_digitalization_of CODEX
+			Entity codex = ontology.getDefinition("CODEX");
+			Relation rel = new Relation(digi, codex, "is_digitalization_of");
+			
+			ontology.saveDefRelation(rel, userName);
+			
+			//-----------
+			Entity witness = ontology.getDefinition("WITNESS");
+			
+			Attribute end_page = new Attribute(Node.TYPE_TBOX, "text", "end_page");
+			end_page.setSourceId(witness.getId());
+			end_page.setSourceObjectClass(Node.TYPE_TBOX);
+			end_page.setSourceModif(witness.getModificationTime());
+			end_page.setSystemStatus(Node.SYS_STATUS_CURRENT_VERSION);
+			
+			ontology.saveDefAttribute(end_page, userName);
+			
+			Attribute start_page = new Attribute(Node.TYPE_TBOX, "text", "start_page");
+			start_page.setSourceId(witness.getId());
+			start_page.setSourceObjectClass(Node.TYPE_TBOX);
+			start_page.setSourceModif(witness.getModificationTime());
+			start_page.setSystemStatus(Node.SYS_STATUS_CURRENT_VERSION);
+			
+			ontology.saveDefAttribute(start_page, userName);
+			
+			
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+		
+		
+	}
+	
+	private static void importData(WrapperService omService){
+		try {
+		    DefaultHttpClient httpclient = new DefaultHttpClient();
+		    String scanListUrl = "https://images.rasi.mcgill.ca/data/";
+		    HttpGet httpGet = new HttpGet(scanListUrl);
+		    System.out.println("Reading scan dirs from "+scanListUrl);
+		    HttpResponse response = httpclient.execute(httpGet);
+		    try {
+		        if (response.getStatusLine().getStatusCode() > 200) {
+		            System.out.println("ERROR reading HTTP response: "+response.getStatusLine());
+		            return;
+		        }
+		        HttpEntity htent = response.getEntity();
+		        String document = EntityUtils.toString(htent);
+		        // brutal HTML string parsing ;-(
+		        Pattern lp = Pattern.compile("<a href=\"([\\w_.]+)\\.json\">");
+		        Matcher lm = lp.matcher(document);
+	            List<Entity> list = new ArrayList<Entity>();
+	            int dirs = 0;
+		        while (lm.find()) {
+		            dirs += 1;
+		            String dirName = lm.group(1);
+                    System.out.println("check: "+dirName);
+		            List<AttributeFilter> filters = new ArrayList<AttributeFilter>();
+		            filters.add(new AttributeFilter("name", dirName, "DIGITALIZATION"));
+                    Map<Entity, Attribute> res = omService.searchEntityByAttributeFilter(filters, -1);
+                    if (res.size() > 0) {
+                        //System.out.println("  exists: "+res);
+                    } else {
+                        System.out.println(" create: "+dirName);
+                        Entity digi = new Entity(Node.TYPE_ABOX, DIGITALIZATION, false);
+                        digi.setOwnValue(dirName);                        
+                        digi.addAttribute(new Attribute("name", "text", dirName));
+                        digi.addAttribute(new Attribute("num_files", "text", "100"));
+                        list.add(digi);
+                    }
+		        }
+		        // ensure http entity is fully consumed
+		        EntityUtils.consume(htent);
+		        // persist OpenMind entities
+		        omService.saveEntityList(list, userName);
+                System.out.println("Read " + dirs + " directories");
+	            System.out.println("Created " + list.size() + " DIGITALIZATIONs");
+	            System.out.println("END");
+		        
+		    } finally {
+		        httpGet.releaseConnection();
+		    }
+		    
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+	}
+	
+	
+	public static void main(String[] args){
+		execute();
+		System.exit(0);
+	}
+}