# HG changeset patch # User Robert Casties # Date 1562610007 -7200 # Node ID 742347ef84109647379042d6aecd95c0c5cad225 # Parent 93c7dbfaf062fb9a56b232a2590bdf2d49b44686 Changed DivaImportHttp for new URL and JSON format. diff -r 93c7dbfaf062 -r 742347ef8410 src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java --- a/src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java Fri Apr 26 18:12:23 2019 +0200 +++ b/src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java Mon Jul 08 20:20:07 2019 +0200 @@ -1,10 +1,9 @@ package org.mpi.openmind.scripts; +import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; @@ -15,11 +14,13 @@ import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.PatternLayout; +import org.json.JSONArray; +import org.json.JSONObject; +import org.json.JSONTokener; import org.mpi.openmind.cache.WrapperService; import org.mpi.openmind.repository.bo.Attribute; import org.mpi.openmind.repository.bo.Entity; import org.mpi.openmind.repository.bo.Node; -import org.mpi.openmind.repository.bo.Relation; import org.mpi.openmind.repository.services.ServiceRegistry; import org.mpi.openmind.repository.services.utils.AttributeFilter; @@ -49,9 +50,9 @@ public static String userName = "diva-import"; /** URL for listing of Diva menifest files */ - public static String scanListUrl = "https://images.rasi.mcgill.ca/data/"; + public static String scanListUrl = "https://ismi-imageserver-cc.mpiwg-berlin.mpg.de/iiif/manifests-internal/"; - public static void execute(){ + public static void execute() { ServiceRegistry services = new ServiceRegistry(); // data model should exist by now //createDataModel(services.getWrapper()); @@ -70,7 +71,7 @@ DefaultHttpClient httpclient = new DefaultHttpClient(); HttpGet httpGet = new HttpGet(scanListUrl); try { - System.out.println("Reading scan dirs from "+scanListUrl); + System.out.println("Reading scan manifests from "+scanListUrl); // send HTTP request and read response HttpResponse response = httpclient.execute(httpGet); if (response.getStatusLine().getStatusCode() > 200) { @@ -78,20 +79,20 @@ return; } HttpEntity htent = response.getEntity(); - String document = EntityUtils.toString(htent); - // brutal HTML parsing by regex ;-( - Pattern lp = Pattern.compile(""); - Matcher lm = lp.matcher(document); + JSONTokener jsonReader = new JSONTokener(new InputStreamReader(htent.getContent())); + // parse JSON directory index of manifest files + JSONArray files = new JSONArray(jsonReader); + int numFiles = files.length(); List list = new ArrayList(); - int dirs = 0; - // go through all filenames in the links - while (lm.find()) { - dirs += 1; - String dirName = lm.group(1); - System.out.println("check: "+dirName); + // go through all filenames in the list + for (int i = 0; i < numFiles; ++i) { + JSONObject file = files.getJSONObject(i); + String filename = file.getString("name"); + System.out.println("check: "+filename); // create filter to search OpenMind + String digiName = filename.replace(".json", ""); List filters = new ArrayList(); - AttributeFilter filter = new AttributeFilter("name", dirName, "DIGITALIZATION"); + AttributeFilter filter = new AttributeFilter("name", digiName, "DIGITALIZATION"); filter.setExactMatch(true); filters.add(filter); // get matching DIGITALIZATIONs @@ -100,10 +101,10 @@ //System.out.println(" exists: "+res); } else { // no existing DIGITALIZATION - create new Entity - System.out.println(" create: "+dirName); + System.out.println(" create: "+filename); Entity digi = new Entity(Node.TYPE_ABOX, DIGITALIZATION, false); - digi.setOwnValue(dirName); - digi.addAttribute(new Attribute("name", "text", dirName)); + digi.setOwnValue(filename); + digi.addAttribute(new Attribute("name", "text", filename)); digi.addAttribute(new Attribute("num_files", "text", "100")); // add to list list.add(digi); @@ -113,7 +114,7 @@ EntityUtils.consume(htent); // persist OpenMind entities omService.saveEntityList(list, userName); - System.out.println("Read " + dirs + " directories"); + System.out.println("Found " + numFiles + " manifests"); System.out.println("Created " + list.size() + " DIGITALIZATIONs"); System.out.println("END"); diff -r 93c7dbfaf062 -r 742347ef8410 src/main/java/org/mpi/openmind/scripts/DivaImportHttpOld.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/main/java/org/mpi/openmind/scripts/DivaImportHttpOld.java Mon Jul 08 20:20:07 2019 +0200 @@ -0,0 +1,134 @@ +package org.mpi.openmind.scripts; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.util.EntityUtils; +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; +import org.mpi.openmind.cache.WrapperService; +import org.mpi.openmind.repository.bo.Attribute; +import org.mpi.openmind.repository.bo.Entity; +import org.mpi.openmind.repository.bo.Node; +import org.mpi.openmind.repository.bo.Relation; +import org.mpi.openmind.repository.services.ServiceRegistry; +import org.mpi.openmind.repository.services.utils.AttributeFilter; + + +/** + * Downloads a list of Diva manifest files from the repository over HTTP + * and checks if each manifest has a corresponding DIGITALIZATION object. + * Creates missing DIGITALIZATION objects. + * + * @author casties + * + */ +public class DivaImportHttpOld { + + static{ + ConsoleAppender console = new ConsoleAppender(); //create appender + //configure the appender + String PATTERN = "%d [%p|%c|%C{1}] %m%n"; + console.setLayout(new PatternLayout(PATTERN)); + console.setThreshold(Level.INFO); + console.activateOptions(); + //add appender to any Logger (here is root) + Logger.getRootLogger().addAppender(console); + } + + public static String DIGITALIZATION = "DIGITALIZATION"; + public static String userName = "diva-import"; + + /** URL for listing of Diva menifest files */ + public static String scanListUrl = "https://images.rasi.mcgill.ca/data/"; + + public static void execute(){ + ServiceRegistry services = new ServiceRegistry(); + // data model should exist by now + //createDataModel(services.getWrapper()); + importData(services.getWrapper()); + } + + /** + * Downloads a list of Diva manifest files from the repository and checks if + * each manifest has a corresponding DIGITALIZATION object. Creates missing + * DIGITALIZATION objects. + * + * @param omService + */ + private static void importData(WrapperService omService){ + try { + DefaultHttpClient httpclient = new DefaultHttpClient(); + HttpGet httpGet = new HttpGet(scanListUrl); + try { + System.out.println("Reading scan dirs from "+scanListUrl); + // send HTTP request and read response + HttpResponse response = httpclient.execute(httpGet); + if (response.getStatusLine().getStatusCode() > 200) { + System.out.println("ERROR reading HTTP response: "+response.getStatusLine()); + return; + } + HttpEntity htent = response.getEntity(); + String document = EntityUtils.toString(htent); + // brutal HTML parsing by regex ;-( + Pattern lp = Pattern.compile(""); + Matcher lm = lp.matcher(document); + List list = new ArrayList(); + int dirs = 0; + // go through all filenames in the links + while (lm.find()) { + dirs += 1; + String dirName = lm.group(1); + System.out.println("check: "+dirName); + // create filter to search OpenMind + List filters = new ArrayList(); + AttributeFilter filter = new AttributeFilter("name", dirName, "DIGITALIZATION"); + filter.setExactMatch(true); + filters.add(filter); + // get matching DIGITALIZATIONs + Map res = omService.searchEntityByAttributeFilter(filters, -1); + if (res.size() > 0) { + //System.out.println(" exists: "+res); + } else { + // no existing DIGITALIZATION - create new Entity + System.out.println(" create: "+dirName); + Entity digi = new Entity(Node.TYPE_ABOX, DIGITALIZATION, false); + digi.setOwnValue(dirName); + digi.addAttribute(new Attribute("name", "text", dirName)); + digi.addAttribute(new Attribute("num_files", "text", "100")); + // add to list + list.add(digi); + } + } + // ensure http entity is fully consumed + EntityUtils.consume(htent); + // persist OpenMind entities + omService.saveEntityList(list, userName); + System.out.println("Read " + dirs + " directories"); + System.out.println("Created " + list.size() + " DIGITALIZATIONs"); + System.out.println("END"); + + } finally { + httpGet.releaseConnection(); + } + + } catch (Exception e) { + e.printStackTrace(); + } + } + + + public static void main(String[] args){ + execute(); + System.exit(0); + } +}