# HG changeset patch # User casties # Date 1477413136 -7200 # Node ID c9a411c8f742fb6dbc4b9173022e93d12b5f455f # Parent fc120c3c6ce44d3699c5b3962c51b7f56da8ad2a more comments. diff -r fc120c3c6ce4 -r c9a411c8f742 src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java --- a/src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java Fri Oct 21 11:52:14 2016 +0200 +++ b/src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java Tue Oct 25 18:32:16 2016 +0200 @@ -24,6 +24,14 @@ import org.mpi.openmind.repository.services.utils.AttributeFilter; +/** + * Downloads a list of Diva manifest files from the repository over HTTP + * and checks if each manifest has a corresponding DIGITALIZATION object. + * Creates missing DIGITALIZATION objects. + * + * @author casties + * + */ public class DivaImportHttp { static{ @@ -40,8 +48,12 @@ public static String DIGITALIZATION = "DIGITALIZATION"; public static String userName = "diva-import"; + /** URL for listing of Diva menifest files */ + public static String scanListUrl = "https://images.rasi.mcgill.ca/data/"; + public static void execute(){ ServiceRegistry services = new ServiceRegistry(); + // data model should exist by now //createDataModel(services.getWrapper()); importData(services.getWrapper()); } @@ -122,37 +134,42 @@ private static void importData(WrapperService omService){ try { DefaultHttpClient httpclient = new DefaultHttpClient(); - String scanListUrl = "https://images.rasi.mcgill.ca/data/"; HttpGet httpGet = new HttpGet(scanListUrl); - System.out.println("Reading scan dirs from "+scanListUrl); - HttpResponse response = httpclient.execute(httpGet); try { + System.out.println("Reading scan dirs from "+scanListUrl); + // send HTTP request and read response + HttpResponse response = httpclient.execute(httpGet); if (response.getStatusLine().getStatusCode() > 200) { System.out.println("ERROR reading HTTP response: "+response.getStatusLine()); return; } HttpEntity htent = response.getEntity(); String document = EntityUtils.toString(htent); - // brutal HTML string parsing ;-( + // brutal HTML parsing by regex ;-( Pattern lp = Pattern.compile(""); Matcher lm = lp.matcher(document); List list = new ArrayList(); int dirs = 0; + // go through all filenames in the links while (lm.find()) { dirs += 1; String dirName = lm.group(1); System.out.println("check: "+dirName); + // create filter to search OpenMind List filters = new ArrayList(); filters.add(new AttributeFilter("name", dirName, "DIGITALIZATION")); + // get matching DIGITALIZATIONs Map res = omService.searchEntityByAttributeFilter(filters, -1); if (res.size() > 0) { //System.out.println(" exists: "+res); } else { + // no existing DIGITALIZATION - create new Entity System.out.println(" create: "+dirName); Entity digi = new Entity(Node.TYPE_ABOX, DIGITALIZATION, false); digi.setOwnValue(dirName); digi.addAttribute(new Attribute("name", "text", dirName)); digi.addAttribute(new Attribute("num_files", "text", "100")); + // add to list list.add(digi); } }