view src/main/java/org/mpi/openmind/scripts/DivaImportHttpOld.java @ 107:742347ef8410

Changed DivaImportHttp for new URL and JSON format.
author Robert Casties <casties@mpiwg-berlin.mpg.de>
date Mon, 08 Jul 2019 20:20:07 +0200
parents
children
line wrap: on
line source

package org.mpi.openmind.scripts;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.ConsoleAppender;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import org.mpi.openmind.cache.WrapperService;
import org.mpi.openmind.repository.bo.Attribute;
import org.mpi.openmind.repository.bo.Entity;
import org.mpi.openmind.repository.bo.Node;
import org.mpi.openmind.repository.bo.Relation;
import org.mpi.openmind.repository.services.ServiceRegistry;
import org.mpi.openmind.repository.services.utils.AttributeFilter;


/**
 * Downloads a list of Diva manifest files from the repository over HTTP 
 * and checks if each manifest has a corresponding DIGITALIZATION object. 
 * Creates missing DIGITALIZATION objects.
 * 
 * @author casties
 *
 */
public class DivaImportHttpOld {
	
	static{
		ConsoleAppender console = new ConsoleAppender(); //create appender
		  //configure the appender
		  String PATTERN = "%d [%p|%c|%C{1}] %m%n";
		  console.setLayout(new PatternLayout(PATTERN)); 
		  console.setThreshold(Level.INFO);
		  console.activateOptions();
		  //add appender to any Logger (here is root)
		  Logger.getRootLogger().addAppender(console);
	}

	public static String DIGITALIZATION = "DIGITALIZATION";
	public static String userName = "diva-import";
	
	/** URL for listing of Diva menifest files */
    public static String scanListUrl = "https://images.rasi.mcgill.ca/data/";
	
	public static void execute(){
		ServiceRegistry services = new ServiceRegistry();
		// data model should exist by now
		//createDataModel(services.getWrapper());
		importData(services.getWrapper());
	}
	
	/**
	 * Downloads a list of Diva manifest files from the repository and checks if
	 * each manifest has a corresponding DIGITALIZATION object. Creates missing
	 * DIGITALIZATION objects.
	 * 
	 * @param omService
	 */
	private static void importData(WrapperService omService){
		try {
		    DefaultHttpClient httpclient = new DefaultHttpClient();
		    HttpGet httpGet = new HttpGet(scanListUrl);
		    try {
	            System.out.println("Reading scan dirs from "+scanListUrl);
	            // send HTTP request and read response
	            HttpResponse response = httpclient.execute(httpGet);
		        if (response.getStatusLine().getStatusCode() > 200) {
		            System.out.println("ERROR reading HTTP response: "+response.getStatusLine());
		            return;
		        }
		        HttpEntity htent = response.getEntity();
		        String document = EntityUtils.toString(htent);
		        // brutal HTML parsing by regex ;-(
		        Pattern lp = Pattern.compile("<a href=\"([\\w_.]+)\\.json\">");
		        Matcher lm = lp.matcher(document);
	            List<Entity> list = new ArrayList<Entity>();
	            int dirs = 0;
	            // go through all filenames in the links
		        while (lm.find()) {
		            dirs += 1;
		            String dirName = lm.group(1);
                    System.out.println("check: "+dirName);
                    // create filter to search OpenMind
		            List<AttributeFilter> filters = new ArrayList<AttributeFilter>();
		            AttributeFilter filter = new AttributeFilter("name", dirName, "DIGITALIZATION");
		            filter.setExactMatch(true);
					filters.add(filter);
		            // get matching DIGITALIZATIONs
                    Map<Entity, Attribute> res = omService.searchEntityByAttributeFilter(filters, -1);
                    if (res.size() > 0) {
                        //System.out.println("  exists: "+res);
                    } else {
                        // no existing DIGITALIZATION - create new Entity
                        System.out.println(" create: "+dirName);
                        Entity digi = new Entity(Node.TYPE_ABOX, DIGITALIZATION, false);
                        digi.setOwnValue(dirName);                        
                        digi.addAttribute(new Attribute("name", "text", dirName));
                        digi.addAttribute(new Attribute("num_files", "text", "100"));
                        // add to list
                        list.add(digi);
                    }
		        }
		        // ensure http entity is fully consumed
		        EntityUtils.consume(htent);
		        // persist OpenMind entities
		        omService.saveEntityList(list, userName);
                System.out.println("Read " + dirs + " directories");
	            System.out.println("Created " + list.size() + " DIGITALIZATIONs");
	            System.out.println("END");
		        
		    } finally {
		        httpGet.releaseConnection();
		    }
		    
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	
	public static void main(String[] args){
		execute();
		System.exit(0);
	}
}