diff src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java @ 2:fab8e78184fa

minor
author dwinter
date Mon, 10 Jan 2011 12:42:27 +0100
parents c6929e63b0b8
children 58b52df9763c
line wrap: on
line diff
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java	Fri Nov 26 09:09:25 2010 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java	Mon Jan 10 12:42:27 2011 +0100
@@ -3,11 +3,12 @@
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
 
-
+import org.apache.http.HttpEntity;
 import org.apache.http.HttpResponse;
 import org.apache.log4j.BasicConfigurator;
 import org.apache.log4j.Level;
@@ -26,7 +27,8 @@
 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
 
 public class ESciDocDataHarvester {
-	
+	static int MAX_REC=1000;
+	//static int MAX_REC=5;
 	protected Logger logger = Logger.getRootLogger();
 	protected Importer importer;
 	protected EScidocBasicHandler connector;
@@ -35,134 +37,197 @@
 	private String echoContext;
 	private Logger addedFile = Logger.getLogger("addedFilesLogger");
 	private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger");
-	
-	
-	public ESciDocDataHarvester(Importer importer, Transformer transformer, EScidocBasicHandler connector, String context) throws IOException{
-		this.importer=importer;
-		this.transformer=transformer;
-		this.connector=connector;
-		this.tools=new EScidocTools(connector);
-		this.echoContext= context;
-	
-		
+
+	public ESciDocDataHarvester(Importer importer, Transformer transformer,
+			EScidocBasicHandler connector, String context) throws IOException {
+		this.importer = importer;
+		this.transformer = transformer;
+		this.connector = connector;
+		this.tools = new EScidocTools(connector);
+		this.echoContext = context;
+
 	}
-	public Boolean readObjectsFromInstance(String type) throws Exception{
+
+	public Boolean readObjectsFromInstance(String type) throws Exception {
 		ArrayList<String> addedObjects = new ArrayList<String>();
 		ArrayList<String> notAddedObjects = new ArrayList<String>();
-		for (ECHOObject obj: importer.getObjectList(type)){
-			
-			
-			if (ECHORessource.class.isInstance(obj)){
-			try {
-				if (connector.alreadyExists("/md-records/md-record/admin/archivePath",((ECHORessource)obj).archivePath,echoContext)){
-					logger.debug("already exist:"+((ECHORessource)obj).archivePath);
+		for (ECHOObject obj : importer.getObjectList(type)) {
+
+			if (ECHORessource.class.isInstance(obj)) {
+				try {
+					if (connector.alreadyExists(
+							"/md-records/md-record/admin/archivePath",
+							((ECHORessource) obj).archivePath, echoContext)) {
+						logger.debug("already exist:"
+								+ ((ECHORessource) obj).archivePath);
+						continue;
+					}
+				} catch (Exception e) {
+					logger.debug("already exist error");
+					e.printStackTrace();
 					continue;
 				}
-			} catch (Exception e) {
-				logger.debug("already exist error");
-				e.printStackTrace();
-				continue;
 			}
-			}
-			
-			obj.context=echoContext;
-			
-			String contid=connector.getIDfromPID(obj.pid,echoContext);
-			if (contid!=null){
-				System.out.println("------- belongsTo:"+contid);
+
+			obj.context = echoContext;
+
+			String contid = connector.getIDfromPID(obj.pid, echoContext);
+			if (contid != null) {
+				System.out.println("------- belongsTo:" + contid);
 			} else {
-			
-			eSciDocXmlObject escidocItem = transformer.transform(obj);
-			logger.info(escidocItem.printXML());
-			// TODO write PID to back to echo-obj
-			Boolean result = connector.createItem(escidocItem);
-			if (result){
-				addedObjects.add(escidocItem.getESciDocId());
-				addedFile.debug(escidocItem.getESciDocId()+"\n");
-				//addedFile.write(escidocItem.getESciDocId()+"\n");
-				//addedFile.flush();
-			
-			}else {
-				notAddedObjects.add(obj.echoUrl);
-				notAddedFile.debug(obj.echoUrl);
-				//notAddedFile.write(obj.echoUrl+"\n");
-				//notAddedFile.flush();
-			}
-			//if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){
-			//	logger.info("PID already exists:"+obj); 
-			//} else if (result == ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){
-			//	logger.info("Object with reference to the same digital object already exists:"+obj);
-			//}
-			
+
+				eSciDocXmlObject escidocItem = transformer.transform(obj);
+				logger.info(escidocItem.printXML());
+				// TODO write PID to back to echo-obj
+				Boolean result = connector.createItem(escidocItem);
+				if (result) {
+					addedObjects.add(escidocItem.getESciDocId());
+					addedFile.debug(escidocItem.getESciDocId() + "\n");
+					// addedFile.write(escidocItem.getESciDocId()+"\n");
+					// addedFile.flush();
+
+				} else {
+					notAddedObjects.add(obj.echoUrl);
+					notAddedFile.debug(obj.echoUrl);
+					// notAddedFile.write(obj.echoUrl+"\n");
+					// notAddedFile.flush();
+				}
+				// if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){
+				// logger.info("PID already exists:"+obj);
+				// } else if (result ==
+				// ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){
+				// logger.info("Object with reference to the same digital object already exists:"+obj);
+				// }
+
 			}
 		}
-		if(logger.getLevel()==Level.DEBUG){
-			for (String addedObject:addedObjects){
+		if (logger.getLevel() == Level.DEBUG) {
+			for (String addedObject : addedObjects) {
 				logger.debug(addedObject);
 			}
 		}
-		
-//		File outFile = new File("/tmp/import.out");
-//		FileWriter fw = new FileWriter(outFile);
-//		for (String addedObject:addedObjects){
-//			fw.write(addedObject+"\n");
-//		}
-//		for (String addedObject:notAddedObjects){
-//			fw.write(addedObject+"\n");
-//		}
-//		fw.close();
+
+		// File outFile = new File("/tmp/import.out");
+		// FileWriter fw = new FileWriter(outFile);
+		// for (String addedObject:addedObjects){
+		// fw.write(addedObject+"\n");
+		// }
+		// for (String addedObject:notAddedObjects){
+		// fw.write(addedObject+"\n");
+		// }
+		// fw.close();
 		return true;
 	}
 
-	public void releaseAndSubmitObjects(String command,String objectXPath) throws Exception{
-		for (eSciDocXmlObject obj: connector.getObjectListFromFilterResult(command,objectXPath)){
-			HttpResponse res = connector.submitAnObject(obj,"first release");
-			logger.debug(res.getStatusLine());
-			if (res.getStatusLine().getStatusCode()!=200){
-				logger.debug("Can not submit:"+obj.getESciDocId());
-				//res.getEntity().consumeContent(); // necessary to release the conneciton
-			
-			}
-			res.getEntity().consumeContent(); // necessary to release the conneciton
-			
-			if (!connector.upDateObject(obj)){
-				logger.debug("Can not update:"+obj.getESciDocId());
-				//continue;
+	/**
+	 * @param command
+	 * @param objectXPath
+	 * @param mode 0 : only submit, 1:only release, 2:release and submit
+	 * @throws Exception
+	 */
+	public void releaseAndSubmitObjects(String command, String objectXPath,int mode)
+			throws Exception {
+
+		Integer numberOfHits = connector.getNumberOfHitsFromFilterResult(
+				command, objectXPath,mode);
+		
+		
+		int tausend = ((numberOfHits-1) / MAX_REC);
+		
+		String queryRestrict="";
+		if(mode==0 | mode==2){
+			queryRestrict="query=%22/properties/version/status%22=pending";
+		} else {
+			queryRestrict="query=%22/properties/version/status%22=submitted";
+		}
+		
+		for (int t = 0; t <= tausend; t++) {
+			int start = t * MAX_REC+1;
+			// int max=Math.min((t+1)*1000, numberOfHits);
+			String query = "?maximumRecords="+String.valueOf(MAX_REC)+"&startRecord="
+					+ String.valueOf(start)+"&"+queryRestrict;
+			for (eSciDocXmlObject obj : connector
+					.getObjectListFromFilterResult(command+query, objectXPath)) {
 				
+				//TODO is the following really necessary, currently the obj in the list is sometimes not the current one.
+				try{
+				HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
+				HttpEntity ent = resObj.getEntity();
+				if (ent!=null){
+				obj= new eSciDocXmlObject(ent.getContent());
+				} else {
+					logger.debug("Can not retrieve:" + obj.getESciDocId());
+					continue;
+				}
+				} catch (Exception e){
+					logger.debug("Can not retrieve:" + obj.getESciDocId());
+					continue;
+				}
+				if (mode==0 | mode==2){
+					HttpResponse res = connector.submitAnObject(obj,
+							"first release");
+					logger.debug(res.getStatusLine());
+				
+					if (res.getStatusLine().getStatusCode() != 200) {
+						logger.debug("Can not submit:" + obj.getESciDocId());
+						// res.getEntity().consumeContent(); // necessary to release
+						// the conneciton
+	
+					}
+					InputStream restream = res.getEntity().getContent();
+					logger.debug(EScidocBasicHandler.convertStreamToString(restream));
+					//res.getEntity().consumeContent(); // necessary to release the
+														// conneciton
+	
+					if (!connector.upDateObject(obj)) {
+						logger.debug("Can not update:" + obj.getESciDocId());
+						// continue;
+	
+					}
+				}
+				
+				if (mode==1 | mode==2){
+					HttpResponse res = connector.releaseAnObject(obj, "first release");
+					logger.debug(res.getStatusLine());
+					if (res.getStatusLine().getStatusCode() != 200) {
+						logger.debug("Can not release:" + obj.getESciDocId());
+						res.getEntity().consumeContent(); // necessary to release
+															// the conneciton
+						continue;
+					}
+					addedFile.debug("RELEASED:" + obj.getESciDocId());
+					res.getEntity().consumeContent(); // necessary to release the
+														// connecito
+				}
 			}
-			
-			
-			res = connector.releaseAnObject(obj, "first release");
-			logger.debug(res.getStatusLine());
-			if (res.getStatusLine().getStatusCode()!=200){
-				logger.debug("Can not release:"+obj.getESciDocId());
-				res.getEntity().consumeContent(); // necessary to release the conneciton
-				continue;
-			}
-			addedFile.debug("RELEASED:"+obj.getESciDocId());
-			res.getEntity().consumeContent(); // necessary to release the conneciton
 		}
-			
+
 	}
-	public static void main(String[] args) throws Exception{
-		
+
+	public static void main(String[] args) throws Exception {
+
 		Logger rl = Logger.getRootLogger();
 		DOMConfigurator.configure("log4uconf.xml");
-        rl.setLevel(Level.DEBUG);
+		rl.setLevel(Level.DEBUG);
 
-        
-		EScidocBasicHandler connector = new EScidocBasicHandler("192.168.56.2",8080,"dwinter","weikiki7");
-		ECHOImporter newimporter = new ECHOImporter(new URL("file:///Users/dwinter/libcoll.rdf"));
-		ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter,
-				new ECHOTransformer(),connector,"/ir/context/escidoc:40001"); 
-		
-		//hv.readObjectsFromInstance("ECHO_collection");
-		//hv.readObjectsFromInstance("ECHO_resource");
-		
-		hv.releaseAndSubmitObjects("/ir/context/escidoc:40001/resources/members","//escidocItem:item");
+				
+		EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7");
 		
 		
-//		newimporter.organizeRessourcesInCollections(connector, "/ir/context/escidoc:1001");
-		//hv.releaseAndSubmitObjects("/ir/containers","//container:container");
+		ECHOImporter newimporter = new ECHOImporter(new URL(
+				"file:///Users/dwinter/libcoll.rdf"));
+		ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter,
+				new ECHOTransformer(), connector, "/ir/context/escidoc:1001");
+
+		// hv.readObjectsFromInstance("ECHO_collection");
+		// hv.readObjectsFromInstance("ECHO_resource");
+
+		hv.releaseAndSubmitObjects(
+				"/ir/context/escidoc:1001/resources/members",
+				"//escidocItem:item",1);
+
+		// newimporter.organizeRessourcesInCollections(connector,
+		// "/ir/context/escidoc:1001");
+		// hv.releaseAndSubmitObjects("/ir/containers","//container:container");
 	}
 }