changeset 47:c9a411c8f742

more comments.
author casties
date Tue, 25 Oct 2016 18:32:16 +0200
parents fc120c3c6ce4
children 69c233694e76
files src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java
diffstat 1 files changed, 21 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java	Fri Oct 21 11:52:14 2016 +0200
+++ b/src/main/java/org/mpi/openmind/scripts/DivaImportHttp.java	Tue Oct 25 18:32:16 2016 +0200
@@ -24,6 +24,14 @@
 import org.mpi.openmind.repository.services.utils.AttributeFilter;
 
 
+/**
+ * Downloads a list of Diva manifest files from the repository over HTTP 
+ * and checks if each manifest has a corresponding DIGITALIZATION object. 
+ * Creates missing DIGITALIZATION objects.
+ * 
+ * @author casties
+ *
+ */
 public class DivaImportHttp {
 	
 	static{
@@ -40,8 +48,12 @@
 	public static String DIGITALIZATION = "DIGITALIZATION";
 	public static String userName = "diva-import";
 	
+	/** URL for listing of Diva menifest files */
+    public static String scanListUrl = "https://images.rasi.mcgill.ca/data/";
+	
 	public static void execute(){
 		ServiceRegistry services = new ServiceRegistry();
+		// data model should exist by now
 		//createDataModel(services.getWrapper());
 		importData(services.getWrapper());
 	}
@@ -122,37 +134,42 @@
 	private static void importData(WrapperService omService){
 		try {
 		    DefaultHttpClient httpclient = new DefaultHttpClient();
-		    String scanListUrl = "https://images.rasi.mcgill.ca/data/";
 		    HttpGet httpGet = new HttpGet(scanListUrl);
-		    System.out.println("Reading scan dirs from "+scanListUrl);
-		    HttpResponse response = httpclient.execute(httpGet);
 		    try {
+	            System.out.println("Reading scan dirs from "+scanListUrl);
+	            // send HTTP request and read response
+	            HttpResponse response = httpclient.execute(httpGet);
 		        if (response.getStatusLine().getStatusCode() > 200) {
 		            System.out.println("ERROR reading HTTP response: "+response.getStatusLine());
 		            return;
 		        }
 		        HttpEntity htent = response.getEntity();
 		        String document = EntityUtils.toString(htent);
-		        // brutal HTML string parsing ;-(
+		        // brutal HTML parsing by regex ;-(
 		        Pattern lp = Pattern.compile("<a href=\"([\\w_.]+)\\.json\">");
 		        Matcher lm = lp.matcher(document);
 	            List<Entity> list = new ArrayList<Entity>();
 	            int dirs = 0;
+	            // go through all filenames in the links
 		        while (lm.find()) {
 		            dirs += 1;
 		            String dirName = lm.group(1);
                     System.out.println("check: "+dirName);
+                    // create filter to search OpenMind
 		            List<AttributeFilter> filters = new ArrayList<AttributeFilter>();
 		            filters.add(new AttributeFilter("name", dirName, "DIGITALIZATION"));
+		            // get matching DIGITALIZATIONs
                     Map<Entity, Attribute> res = omService.searchEntityByAttributeFilter(filters, -1);
                     if (res.size() > 0) {
                         //System.out.println("  exists: "+res);
                     } else {
+                        // no existing DIGITALIZATION - create new Entity
                         System.out.println(" create: "+dirName);
                         Entity digi = new Entity(Node.TYPE_ABOX, DIGITALIZATION, false);
                         digi.setOwnValue(dirName);                        
                         digi.addAttribute(new Attribute("name", "text", dirName));
                         digi.addAttribute(new Attribute("num_files", "text", "100"));
+                        // add to list
                         list.add(digi);
                     }
 		        }