changeset 2:fab8e78184fa

minor
author dwinter
date Mon, 10 Jan 2011 12:42:27 +0100
parents 6b0267cb40ed
children 58b52df9763c
files src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java
diffstat 5 files changed, 223 insertions(+), 112 deletions(-) [+]
line wrap: on
line diff
--- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java	Fri Nov 26 09:09:25 2010 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java	Mon Jan 10 12:42:27 2011 +0100
@@ -236,7 +236,7 @@
 		    }
 		    }
 		    
-		    logger.debug("executing request:"+httpBase.getRequestLine());
+		    //logger.debug("executing request:"+httpBase.getRequestLine());
 		    
 		   
 		    HttpResponse status = httpclient.execute(httpBase);
@@ -510,7 +510,7 @@
 
 	public HttpResponse submitAnObject(eSciDocXmlObject obj, String comment) throws ClientProtocolException, IOException, JDOMException {
 		try {
-		addVersionPid(obj);
+		//addVersionPid(obj);
 		} catch (Exception e) {
 			// TODO: handle exception
 		}
@@ -540,9 +540,13 @@
 		//String command = context+"/resources/members";
 		
 		HttpResponse result =eScidocGet(command);
+		//InputStream text=result.getEntity().getContent();
+		//String tmtxt = convertStreamToString(text);
+		//System.out.println(tmtxt);
 		Document dom = new SAXBuilder().build(result.getEntity().getContent());
 		XPath xp = EScidocTools.getESciDocXpath(objectXpath);
 		
+		
 		List<Element> attr = (List<Element>)xp.selectNodes(dom);
 		ArrayList<eSciDocXmlObject> ret = new ArrayList<eSciDocXmlObject>(); 
 		for (Element el: attr){
@@ -551,6 +555,28 @@
 		return ret; 
 	}
 	
+	public Integer getNumberOfHitsFromFilterResult(String command,
+			String objectXPath, int mode) throws IOException, IllegalStateException, JDOMException {
+		String query="";
+		if(mode==0 | mode==2){
+			query="query=%22/properties/version/status%22=pending";
+		} else {
+			query="query=%22/properties/version/status%22=submitted";
+		}
+		
+		HttpResponse result =eScidocGet(command+"?maximumRecords=1&"+query);
+		//InputStream text=result.getEntity().getContent();
+		//String tmtxt = convertStreamToString(text);
+		//System.out.println(tmtxt);
+		Document dom = new SAXBuilder().build(result.getEntity().getContent());
+		XPath xp = EScidocTools.getESciDocXpath("//srw:numberOfRecords");
+		
+		Element  attr = (Element)xp.selectSingleNode(dom);
+		
+		Integer tmpInt = Integer.valueOf(attr.getText());
+		return tmpInt;
+	}
+	
 	public boolean addVersionPid(eSciDocXmlObject obj) throws ClientProtocolException, IOException, JDOMException{
 		//HttpResponse ret = eScidocGet(href);
 		//String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent());
@@ -571,8 +597,11 @@
 		
 		int code = result.getStatusLine().getStatusCode();
 		result.getEntity().consumeContent();
-		if (code!=200)
+		if (code!=200){
+			if(code==450)
+				return false;
 			return false;
+		}
 		return true;
 	
 		
@@ -679,4 +708,8 @@
 		fw.close();
 		return ret;
 	}
+
+
+
+
 }
--- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java	Fri Nov 26 09:09:25 2010 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java	Mon Jan 10 12:42:27 2011 +0100
@@ -45,7 +45,6 @@
 	
 	public static XPath getESciDocXpath(String xpath) throws JDOMException{
 		XPath xpathResources = XPath.newInstance(xpath);
-		
 		xpathResources.addNamespace("prop",prop);
 		xpathResources.addNamespace("mpiwg",MPIWG);
 		
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java	Fri Nov 26 09:09:25 2010 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java	Mon Jan 10 12:42:27 2011 +0100
@@ -3,11 +3,12 @@
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
 
-
+import org.apache.http.HttpEntity;
 import org.apache.http.HttpResponse;
 import org.apache.log4j.BasicConfigurator;
 import org.apache.log4j.Level;
@@ -26,7 +27,8 @@
 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
 
 public class ESciDocDataHarvester {
-	
+	static int MAX_REC=1000;
+	//static int MAX_REC=5;
 	protected Logger logger = Logger.getRootLogger();
 	protected Importer importer;
 	protected EScidocBasicHandler connector;
@@ -35,134 +37,197 @@
 	private String echoContext;
 	private Logger addedFile = Logger.getLogger("addedFilesLogger");
 	private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger");
-	
-	
-	public ESciDocDataHarvester(Importer importer, Transformer transformer, EScidocBasicHandler connector, String context) throws IOException{
-		this.importer=importer;
-		this.transformer=transformer;
-		this.connector=connector;
-		this.tools=new EScidocTools(connector);
-		this.echoContext= context;
-	
-		
+
+	public ESciDocDataHarvester(Importer importer, Transformer transformer,
+			EScidocBasicHandler connector, String context) throws IOException {
+		this.importer = importer;
+		this.transformer = transformer;
+		this.connector = connector;
+		this.tools = new EScidocTools(connector);
+		this.echoContext = context;
+
 	}
-	public Boolean readObjectsFromInstance(String type) throws Exception{
+
+	public Boolean readObjectsFromInstance(String type) throws Exception {
 		ArrayList<String> addedObjects = new ArrayList<String>();
 		ArrayList<String> notAddedObjects = new ArrayList<String>();
-		for (ECHOObject obj: importer.getObjectList(type)){
-			
-			
-			if (ECHORessource.class.isInstance(obj)){
-			try {
-				if (connector.alreadyExists("/md-records/md-record/admin/archivePath",((ECHORessource)obj).archivePath,echoContext)){
-					logger.debug("already exist:"+((ECHORessource)obj).archivePath);
+		for (ECHOObject obj : importer.getObjectList(type)) {
+
+			if (ECHORessource.class.isInstance(obj)) {
+				try {
+					if (connector.alreadyExists(
+							"/md-records/md-record/admin/archivePath",
+							((ECHORessource) obj).archivePath, echoContext)) {
+						logger.debug("already exist:"
+								+ ((ECHORessource) obj).archivePath);
+						continue;
+					}
+				} catch (Exception e) {
+					logger.debug("already exist error");
+					e.printStackTrace();
 					continue;
 				}
-			} catch (Exception e) {
-				logger.debug("already exist error");
-				e.printStackTrace();
-				continue;
 			}
-			}
-			
-			obj.context=echoContext;
-			
-			String contid=connector.getIDfromPID(obj.pid,echoContext);
-			if (contid!=null){
-				System.out.println("------- belongsTo:"+contid);
+
+			obj.context = echoContext;
+
+			String contid = connector.getIDfromPID(obj.pid, echoContext);
+			if (contid != null) {
+				System.out.println("------- belongsTo:" + contid);
 			} else {
-			
-			eSciDocXmlObject escidocItem = transformer.transform(obj);
-			logger.info(escidocItem.printXML());
-			// TODO write PID to back to echo-obj
-			Boolean result = connector.createItem(escidocItem);
-			if (result){
-				addedObjects.add(escidocItem.getESciDocId());
-				addedFile.debug(escidocItem.getESciDocId()+"\n");
-				//addedFile.write(escidocItem.getESciDocId()+"\n");
-				//addedFile.flush();
-			
-			}else {
-				notAddedObjects.add(obj.echoUrl);
-				notAddedFile.debug(obj.echoUrl);
-				//notAddedFile.write(obj.echoUrl+"\n");
-				//notAddedFile.flush();
-			}
-			//if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){
-			//	logger.info("PID already exists:"+obj); 
-			//} else if (result == ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){
-			//	logger.info("Object with reference to the same digital object already exists:"+obj);
-			//}
-			
+
+				eSciDocXmlObject escidocItem = transformer.transform(obj);
+				logger.info(escidocItem.printXML());
+				// TODO write PID to back to echo-obj
+				Boolean result = connector.createItem(escidocItem);
+				if (result) {
+					addedObjects.add(escidocItem.getESciDocId());
+					addedFile.debug(escidocItem.getESciDocId() + "\n");
+					// addedFile.write(escidocItem.getESciDocId()+"\n");
+					// addedFile.flush();
+
+				} else {
+					notAddedObjects.add(obj.echoUrl);
+					notAddedFile.debug(obj.echoUrl);
+					// notAddedFile.write(obj.echoUrl+"\n");
+					// notAddedFile.flush();
+				}
+				// if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){
+				// logger.info("PID already exists:"+obj);
+				// } else if (result ==
+				// ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){
+				// logger.info("Object with reference to the same digital object already exists:"+obj);
+				// }
+
 			}
 		}
-		if(logger.getLevel()==Level.DEBUG){
-			for (String addedObject:addedObjects){
+		if (logger.getLevel() == Level.DEBUG) {
+			for (String addedObject : addedObjects) {
 				logger.debug(addedObject);
 			}
 		}
-		
-//		File outFile = new File("/tmp/import.out");
-//		FileWriter fw = new FileWriter(outFile);
-//		for (String addedObject:addedObjects){
-//			fw.write(addedObject+"\n");
-//		}
-//		for (String addedObject:notAddedObjects){
-//			fw.write(addedObject+"\n");
-//		}
-//		fw.close();
+
+		// File outFile = new File("/tmp/import.out");
+		// FileWriter fw = new FileWriter(outFile);
+		// for (String addedObject:addedObjects){
+		// fw.write(addedObject+"\n");
+		// }
+		// for (String addedObject:notAddedObjects){
+		// fw.write(addedObject+"\n");
+		// }
+		// fw.close();
 		return true;
 	}
 
-	public void releaseAndSubmitObjects(String command,String objectXPath) throws Exception{
-		for (eSciDocXmlObject obj: connector.getObjectListFromFilterResult(command,objectXPath)){
-			HttpResponse res = connector.submitAnObject(obj,"first release");
-			logger.debug(res.getStatusLine());
-			if (res.getStatusLine().getStatusCode()!=200){
-				logger.debug("Can not submit:"+obj.getESciDocId());
-				//res.getEntity().consumeContent(); // necessary to release the conneciton
-			
-			}
-			res.getEntity().consumeContent(); // necessary to release the conneciton
-			
-			if (!connector.upDateObject(obj)){
-				logger.debug("Can not update:"+obj.getESciDocId());
-				//continue;
+	/**
+	 * @param command
+	 * @param objectXPath
+	 * @param mode 0 : only submit, 1:only release, 2:release and submit
+	 * @throws Exception
+	 */
+	public void releaseAndSubmitObjects(String command, String objectXPath,int mode)
+			throws Exception {
+
+		Integer numberOfHits = connector.getNumberOfHitsFromFilterResult(
+				command, objectXPath,mode);
+		
+		
+		int tausend = ((numberOfHits-1) / MAX_REC);
+		
+		String queryRestrict="";
+		if(mode==0 | mode==2){
+			queryRestrict="query=%22/properties/version/status%22=pending";
+		} else {
+			queryRestrict="query=%22/properties/version/status%22=submitted";
+		}
+		
+		for (int t = 0; t <= tausend; t++) {
+			int start = t * MAX_REC+1;
+			// int max=Math.min((t+1)*1000, numberOfHits);
+			String query = "?maximumRecords="+String.valueOf(MAX_REC)+"&startRecord="
+					+ String.valueOf(start)+"&"+queryRestrict;
+			for (eSciDocXmlObject obj : connector
+					.getObjectListFromFilterResult(command+query, objectXPath)) {
 				
+				//TODO is the following really necessary, currently the obj in the list is sometimes not the current one.
+				try{
+				HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
+				HttpEntity ent = resObj.getEntity();
+				if (ent!=null){
+				obj= new eSciDocXmlObject(ent.getContent());
+				} else {
+					logger.debug("Can not retrieve:" + obj.getESciDocId());
+					continue;
+				}
+				} catch (Exception e){
+					logger.debug("Can not retrieve:" + obj.getESciDocId());
+					continue;
+				}
+				if (mode==0 | mode==2){
+					HttpResponse res = connector.submitAnObject(obj,
+							"first release");
+					logger.debug(res.getStatusLine());
+				
+					if (res.getStatusLine().getStatusCode() != 200) {
+						logger.debug("Can not submit:" + obj.getESciDocId());
+						// res.getEntity().consumeContent(); // necessary to release
+						// the conneciton
+	
+					}
+					InputStream restream = res.getEntity().getContent();
+					logger.debug(EScidocBasicHandler.convertStreamToString(restream));
+					//res.getEntity().consumeContent(); // necessary to release the
+														// conneciton
+	
+					if (!connector.upDateObject(obj)) {
+						logger.debug("Can not update:" + obj.getESciDocId());
+						// continue;
+	
+					}
+				}
+				
+				if (mode==1 | mode==2){
+					HttpResponse res = connector.releaseAnObject(obj, "first release");
+					logger.debug(res.getStatusLine());
+					if (res.getStatusLine().getStatusCode() != 200) {
+						logger.debug("Can not release:" + obj.getESciDocId());
+						res.getEntity().consumeContent(); // necessary to release
+															// the conneciton
+						continue;
+					}
+					addedFile.debug("RELEASED:" + obj.getESciDocId());
+					res.getEntity().consumeContent(); // necessary to release the
+														// connecito
+				}
 			}
-			
-			
-			res = connector.releaseAnObject(obj, "first release");
-			logger.debug(res.getStatusLine());
-			if (res.getStatusLine().getStatusCode()!=200){
-				logger.debug("Can not release:"+obj.getESciDocId());
-				res.getEntity().consumeContent(); // necessary to release the conneciton
-				continue;
-			}
-			addedFile.debug("RELEASED:"+obj.getESciDocId());
-			res.getEntity().consumeContent(); // necessary to release the conneciton
 		}
-			
+
 	}
-	public static void main(String[] args) throws Exception{
-		
+
+	public static void main(String[] args) throws Exception {
+
 		Logger rl = Logger.getRootLogger();
 		DOMConfigurator.configure("log4uconf.xml");
-        rl.setLevel(Level.DEBUG);
+		rl.setLevel(Level.DEBUG);
 
-        
-		EScidocBasicHandler connector = new EScidocBasicHandler("192.168.56.2",8080,"dwinter","weikiki7");
-		ECHOImporter newimporter = new ECHOImporter(new URL("file:///Users/dwinter/libcoll.rdf"));
-		ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter,
-				new ECHOTransformer(),connector,"/ir/context/escidoc:40001"); 
-		
-		//hv.readObjectsFromInstance("ECHO_collection");
-		//hv.readObjectsFromInstance("ECHO_resource");
-		
-		hv.releaseAndSubmitObjects("/ir/context/escidoc:40001/resources/members","//escidocItem:item");
+				
+		EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7");
 		
 		
-//		newimporter.organizeRessourcesInCollections(connector, "/ir/context/escidoc:1001");
-		//hv.releaseAndSubmitObjects("/ir/containers","//container:container");
+		ECHOImporter newimporter = new ECHOImporter(new URL(
+				"file:///Users/dwinter/libcoll.rdf"));
+		ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter,
+				new ECHOTransformer(), connector, "/ir/context/escidoc:1001");
+
+		// hv.readObjectsFromInstance("ECHO_collection");
+		// hv.readObjectsFromInstance("ECHO_resource");
+
+		hv.releaseAndSubmitObjects(
+				"/ir/context/escidoc:1001/resources/members",
+				"//escidocItem:item",1);
+
+		// newimporter.organizeRessourcesInCollections(connector,
+		// "/ir/context/escidoc:1001");
+		// hv.releaseAndSubmitObjects("/ir/containers","//container:container");
 	}
 }
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java	Fri Nov 26 09:09:25 2010 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java	Mon Jan 10 12:42:27 2011 +0100
@@ -41,9 +41,10 @@
 				new ECHOTransformer(),connector,"/ir/context/escidoc:12001"); 
 		
 		//hv.readObjectsFromInstance("ECHO_collection");
-		hv.readObjectsFromInstance("ECHO_resource");
+		//hv.readObjectsFromInstance("ECHO_resource");
 		
 		//hv.releaseAndSubmitObjects("/ir/context/escidoc:12001");
+		hv.releaseAndSubmitObjects("/ir/context/escidoc:12001/resources/members","//escidocItem:item",1);
 		
 
 	}
--- a/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java	Fri Nov 26 09:09:25 2010 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java	Mon Jan 10 12:42:27 2011 +0100
@@ -85,6 +85,19 @@
 			this.pid= test.getTextTrim();
 }
 
+	public eSciDocXmlObject(InputStream content) throws JDOMException, IOException {
+		SAXBuilder  builder = new SAXBuilder();
+		
+		dom = builder.build(content);
+		
+		xpath = EScidocTools.getESciDocXpath("//prop:pid");
+		
+		Element test = (Element) xpath.selectSingleNode(dom);
+			
+		this.pid= test.getTextTrim();
+	
+	}
+
 	public void insertDC(Element mdDc) throws Exception{
 		//NodeList dcList = mdDc.getFirstChild().getChildNodes();
 		XPath dcXPath = EScidocTools.getESciDocXpath("//dc-md"); //todo check path to dc in escddoc templates