Mercurial > hg > openmind
view src/main/java/org/mpi/openmind/scripts/DivaImportHttpOld.java @ 107:742347ef8410
Changed DivaImportHttp for new URL and JSON format.
author | Robert Casties <casties@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 08 Jul 2019 20:20:07 +0200 |
parents | |
children |
line wrap: on
line source
package org.mpi.openmind.scripts; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.util.EntityUtils; import org.apache.log4j.ConsoleAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.PatternLayout; import org.mpi.openmind.cache.WrapperService; import org.mpi.openmind.repository.bo.Attribute; import org.mpi.openmind.repository.bo.Entity; import org.mpi.openmind.repository.bo.Node; import org.mpi.openmind.repository.bo.Relation; import org.mpi.openmind.repository.services.ServiceRegistry; import org.mpi.openmind.repository.services.utils.AttributeFilter; /** * Downloads a list of Diva manifest files from the repository over HTTP * and checks if each manifest has a corresponding DIGITALIZATION object. * Creates missing DIGITALIZATION objects. * * @author casties * */ public class DivaImportHttpOld { static{ ConsoleAppender console = new ConsoleAppender(); //create appender //configure the appender String PATTERN = "%d [%p|%c|%C{1}] %m%n"; console.setLayout(new PatternLayout(PATTERN)); console.setThreshold(Level.INFO); console.activateOptions(); //add appender to any Logger (here is root) Logger.getRootLogger().addAppender(console); } public static String DIGITALIZATION = "DIGITALIZATION"; public static String userName = "diva-import"; /** URL for listing of Diva menifest files */ public static String scanListUrl = "https://images.rasi.mcgill.ca/data/"; public static void execute(){ ServiceRegistry services = new ServiceRegistry(); // data model should exist by now //createDataModel(services.getWrapper()); importData(services.getWrapper()); } /** * Downloads a list of Diva manifest files from the repository and checks if * each manifest has a corresponding DIGITALIZATION object. Creates missing * DIGITALIZATION objects. * * @param omService */ private static void importData(WrapperService omService){ try { DefaultHttpClient httpclient = new DefaultHttpClient(); HttpGet httpGet = new HttpGet(scanListUrl); try { System.out.println("Reading scan dirs from "+scanListUrl); // send HTTP request and read response HttpResponse response = httpclient.execute(httpGet); if (response.getStatusLine().getStatusCode() > 200) { System.out.println("ERROR reading HTTP response: "+response.getStatusLine()); return; } HttpEntity htent = response.getEntity(); String document = EntityUtils.toString(htent); // brutal HTML parsing by regex ;-( Pattern lp = Pattern.compile("<a href=\"([\\w_.]+)\\.json\">"); Matcher lm = lp.matcher(document); List<Entity> list = new ArrayList<Entity>(); int dirs = 0; // go through all filenames in the links while (lm.find()) { dirs += 1; String dirName = lm.group(1); System.out.println("check: "+dirName); // create filter to search OpenMind List<AttributeFilter> filters = new ArrayList<AttributeFilter>(); AttributeFilter filter = new AttributeFilter("name", dirName, "DIGITALIZATION"); filter.setExactMatch(true); filters.add(filter); // get matching DIGITALIZATIONs Map<Entity, Attribute> res = omService.searchEntityByAttributeFilter(filters, -1); if (res.size() > 0) { //System.out.println(" exists: "+res); } else { // no existing DIGITALIZATION - create new Entity System.out.println(" create: "+dirName); Entity digi = new Entity(Node.TYPE_ABOX, DIGITALIZATION, false); digi.setOwnValue(dirName); digi.addAttribute(new Attribute("name", "text", dirName)); digi.addAttribute(new Attribute("num_files", "text", "100")); // add to list list.add(digi); } } // ensure http entity is fully consumed EntityUtils.consume(htent); // persist OpenMind entities omService.saveEntityList(list, userName); System.out.println("Read " + dirs + " directories"); System.out.println("Created " + list.size() + " DIGITALIZATIONs"); System.out.println("END"); } finally { httpGet.releaseConnection(); } } catch (Exception e) { e.printStackTrace(); } } public static void main(String[] args){ execute(); System.exit(0); } }