Mercurial > hg > openmind
changeset 47:c9a411c8f742
more comments.
author | casties |
---|---|
date | Tue, 25 Oct 2016 18:32:16 +0200 |
parents | fc120c3c6ce4 |
children | 69c233694e76 |
files | src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java |
diffstat | 1 files changed, 21 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java Fri Oct 21 11:52:14 2016 +0200 +++ b/src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java Tue Oct 25 18:32:16 2016 +0200 @@ -24,6 +24,14 @@ import org.mpi.openmind.repository.services.utils.AttributeFilter; +/** + * Downloads a list of Diva manifest files from the repository over HTTP + * and checks if each manifest has a corresponding DIGITALIZATION object. + * Creates missing DIGITALIZATION objects. + * + * @author casties + * + */ public class DivaImportHttp { static{ @@ -40,8 +48,12 @@ public static String DIGITALIZATION = "DIGITALIZATION"; public static String userName = "diva-import"; + /** URL for listing of Diva menifest files */ + public static String scanListUrl = "https://images.rasi.mcgill.ca/data/"; + public static void execute(){ ServiceRegistry services = new ServiceRegistry(); + // data model should exist by now //createDataModel(services.getWrapper()); importData(services.getWrapper()); } @@ -122,37 +134,42 @@ private static void importData(WrapperService omService){ try { DefaultHttpClient httpclient = new DefaultHttpClient(); - String scanListUrl = "https://images.rasi.mcgill.ca/data/"; HttpGet httpGet = new HttpGet(scanListUrl); - System.out.println("Reading scan dirs from "+scanListUrl); - HttpResponse response = httpclient.execute(httpGet); try { + System.out.println("Reading scan dirs from "+scanListUrl); + // send HTTP request and read response + HttpResponse response = httpclient.execute(httpGet); if (response.getStatusLine().getStatusCode() > 200) { System.out.println("ERROR reading HTTP response: "+response.getStatusLine()); return; } HttpEntity htent = response.getEntity(); String document = EntityUtils.toString(htent); - // brutal HTML string parsing ;-( + // brutal HTML parsing by regex ;-( Pattern lp = Pattern.compile("<a href=\"([\\w_.]+)\\.json\">"); Matcher lm = lp.matcher(document); List<Entity> list = new ArrayList<Entity>(); int dirs = 0; + // go through all filenames in the links while (lm.find()) { dirs += 1; String dirName = lm.group(1); System.out.println("check: "+dirName); + // create filter to search OpenMind List<AttributeFilter> filters = new ArrayList<AttributeFilter>(); filters.add(new AttributeFilter("name", dirName, "DIGITALIZATION")); + // get matching DIGITALIZATIONs Map<Entity, Attribute> res = omService.searchEntityByAttributeFilter(filters, -1); if (res.size() > 0) { //System.out.println(" exists: "+res); } else { + // no existing DIGITALIZATION - create new Entity System.out.println(" create: "+dirName); Entity digi = new Entity(Node.TYPE_ABOX, DIGITALIZATION, false); digi.setOwnValue(dirName); digi.addAttribute(new Attribute("name", "text", dirName)); digi.addAttribute(new Attribute("num_files", "text", "100")); + // add to list list.add(digi); } }