changeset 12:6e55be78bd57

Merge with ad5c0748bd08411e7d2a6b4186a77303716c9ebb
author dwinter
date Mon, 20 Aug 2012 12:59:13 +0200
parents ecd66e70cb65 (current diff) ad5c0748bd08 (diff)
children 9164f3f4b232
files
diffstat 4 files changed, 153 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceAffiliation_local.java	Mon Aug 20 12:59:13 2012 +0200
@@ -0,0 +1,101 @@
+package de.mpiwg.itgroup.eSciDoc.Tools.Pubman;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.xpath.XPath;
+
+import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
+import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
+import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
+
+public class ReplaceAffiliation_local {
+
+	/**
+	 * @param args
+	 * @throws ESciDocXmlObjectException
+	 * @throws JDOMException
+	 * @throws IOException
+	 * @throws IllegalStateException
+	 */
+	public static void main(String[] args) throws IllegalStateException,
+			IOException, JDOMException, ESciDocXmlObjectException {
+
+		Logger logger = Logger.getRootLogger();
+		logger.setLevel(Level.DEBUG);
+		EScidocBasicHandler connector = new EScidocBasicHandler(
+				"escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl0rian");
+
+		if(args.length<2){
+			System.out.println("Usage: startrecord maximumrecords");
+			System.exit(-1);
+		}
+		String MAX_REC = args[1];
+		String start = args[0];
+		String objectXPath = "//escidocItem:item";
+
+		String query = "?maximumRecords=" + String.valueOf(MAX_REC)
+				+ "&startRecord=" + String.valueOf(start);
+		String command = "/ir/context/escidoc:38279/resources/members";
+		for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult(
+				command + query, objectXPath)) {
+
+			Document doc = obj.getDocument();
+			Boolean changed=false;
+			XPath personXPath = EScidocTools.getESciDocXpath("//eterms:creator/person:person");
+			XPath organizationXPath = EScidocTools.getESciDocXpath("./organization:organization");
+			XPath titleXPath = EScidocTools.getESciDocXpath("./dc:title");
+			XPath identifierXPath = EScidocTools.getESciDocXpath("./dc:identifier");
+			
+			@SuppressWarnings("unchecked")
+			List<Element> persons = personXPath.selectNodes(doc);
+			for (Element person : persons) {
+				Element organization = (Element) organizationXPath.selectSingleNode(person);
+				if (organization!=null){
+					Element title = (Element) titleXPath.selectSingleNode(organization);
+					
+					String titleString = title.getTextTrim();
+					if (titleString
+							.startsWith("Max Planck Society")) {
+						title.setText("Max Planck Institute for the History of Science");
+						Element identifier = (Element) identifierXPath.selectSingleNode(organization);
+						identifier.setText("escidoc:14002");
+						changed=true;
+					
+					}
+					}
+			}
+			
+			//System.out.println(obj.printXML());
+			if (changed){
+				
+				Boolean retVal = connector.updateItem(obj);
+				System.out.println("Replaced:"+obj.getESciDocId());
+				HttpResponse retValu = connector.submitAnObject(obj, "changed affiliation of persons");
+				
+				System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent()));
+				HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
+				HttpEntity ent = resObj.getEntity();
+				if (ent!=null){
+				obj= new eSciDocXmlObject(ent.getContent());
+				} else {
+					System.out.println("Can not retrieve:" + obj.getESciDocId());
+					continue;
+				}
+				
+				HttpResponse reValue2 = connector.releaseAnObject(obj, "changed affiliation of persons");
+				System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent()));
+
+			}
+		}
+
+	}
+}
--- a/src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaIterator.java	Thu Jul 26 11:24:03 2012 +0200
+++ b/src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaIterator.java	Mon Aug 20 12:59:13 2012 +0200
@@ -24,6 +24,7 @@
 import java.util.ArrayList;
 import java.util.Enumeration;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Stack;
 import java.util.Vector;
 
@@ -51,14 +52,14 @@
 	private ArrayList<String>filter; //Array of paths which shouldn'T be indexed
 
 	public IndexMetaIterator(File rootFolder) throws IOException{
-		
+
 		filter = new ArrayList<String>();
 		filter.add("/mpiwg/online/permanent/SudanRockArt"); // TODO: make this configurable
-		
+
 		this.rootFolder=rootFolder;
 		this.currentFolder=rootFolder;
 		this.stack = new Stack<String>();
-		
+
 		for (String f:rootFolder.list()){
 			String fn = rootFolder.getCanonicalPath()+"/"+f;
 			if (!filter.contains(fn)){
@@ -82,39 +83,42 @@
 		String nextFile = stack.pop();
 		while(!nextFile.endsWith(".meta") && !stack.isEmpty()){
 			System.out.println("CHECK_________"+nextFile);
-			
-		
+
+
 			if(!nextFile.endsWith("pageimg") & !nextFile.endsWith("pages") & !nextFile.endsWith("pagesHi") & !nextFile.endsWith("pagesLo") 
 					& !nextFile.endsWith("pageimg")){ //skip pageimg
-				
+
 				File nf = new File(nextFile);
-				
+
 				if(nf.isDirectory()){
-					for (String f:nf.list()){
-						String fn;
-						try {
-							if (!f.startsWith(".")){
-							fn = nf.getCanonicalPath()+"/"+f;
-							if (!filter.contains(fn)){
-								if (!f.equals("")) {// FIXME some filesystems (sshfs?) gives empty filenames if the path contains special characters.
-									stack.push(fn);}
-									else {
-										Logger.getLogger("notAddedFilesLogger").info("Folder -" +fn+" contains files with characters I cannot read!" );
+					String[] filesInDirectory=nf.list();
+					if (filesInDirectory != null){
+						for (String f:filesInDirectory){
+							String fn;
+							try {
+								if (!f.startsWith(".")){
+									fn = nf.getCanonicalPath()+"/"+f;
+									if (!filter.contains(fn)){
+										if (!f.equals("")) {// FIXME some filesystems (sshfs?) gives empty filenames if the path contains special characters.
+											stack.push(fn);}
+										else {
+											Logger.getLogger("notAddedFilesLogger").info("Folder -" +fn+" contains files with characters I cannot read!" );
+										}
+
 									}
-							
-							}
+								}
+							} catch (IOException e) {
+								// TODO Auto-generated catch block
+								e.printStackTrace();
 							}
-						} catch (IOException e) {
-							// TODO Auto-generated catch block
-							e.printStackTrace();
+
 						}
-						
 					}
 				}
 			}
-			
+
 			nextFile = stack.pop();
-			
+
 		}
 		if (!nextFile.endsWith(".meta")) //der letzte Eintrag muss noch gretrennt getestet werden.
 			nextFile = null;
@@ -136,15 +140,15 @@
 	}
 
 	private ECHOObject createECHOObject(File nextFile) throws Exception {
-		
+
 		//Document doc = new SAXBuilder().build(nextFile);
 		try{
-		FoxridgeRessource er = new FoxridgeRessource(nextFile.getParentFile().getName(),nextFile.getParentFile().getAbsolutePath(),null);
-		
-		er.metaData = er.correctML(nextFile.getAbsolutePath());
-		//er.pid=er.getPid(); //TODO: not needed any more?
-		er.echoUrl=er.metaData; //TODO find a better solution, what to present here, z.b. texttool-tag auswerten.
-		return er;
+			FoxridgeRessource er = new FoxridgeRessource(nextFile.getParentFile().getName(),nextFile.getParentFile().getAbsolutePath(),null);
+
+			er.metaData = er.correctML(nextFile.getAbsolutePath());
+			//er.pid=er.getPid(); //TODO: not needed any more?
+			er.echoUrl=er.metaData; //TODO find a better solution, what to present here, z.b. texttool-tag auswerten.
+			return er;
 		} catch  (Exception e) {
 			// TODO Auto-generated catch block
 			e.printStackTrace();
@@ -154,12 +158,12 @@
 	@Override
 	public void remove() {
 		// TODO Auto-generated method stub
-		
+
 	}
 
- 
+
 
 }
 
 
-            
+
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java	Thu Jul 26 11:24:03 2012 +0200
+++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java	Mon Aug 20 12:59:13 2012 +0200
@@ -240,9 +240,10 @@
 					// TODO write PID to back to echo-obj
 					Boolean result = connector.createItem(escidocItem);
 					if (result) {
-						addedObjects.add(escidocItem.getESciDocId());
-						addedFile.debug(escidocItem.getESciDocId() + "\n");
-
+						String objID=escidocItem.getESciDocId();
+						addedObjects.add(objID);
+						addedFile.debug(objID + "\n");
+						((ECHORessource)obj).writeEsciDocIDToIndexMeta(objID.replace("/ir/item/",""));
 					} else {
 						notAddedObjects.add(obj.echoUrl);
 						notAddedFile.debug(obj.echoUrl);
@@ -344,7 +345,7 @@
 		Integer numberOfHits = connector.getNumberOfHitsFromFilterResult(
 				command, objectXPath,mode);
 
-
+		logger.info(String.format("%s objects found.", numberOfHits));
 		int tausend = ((numberOfHits-1) / MAX_REC);
 
 		String queryRestrict="";
@@ -353,7 +354,7 @@
 		} else {
 			queryRestrict="query=%22/properties/version/status%22=submitted";
 		}
-
+		int counter=0;
 		for (int t = 0; t <= tausend; t++) {
 			int start = t * MAX_REC+1;
 			// int max=Math.min((t+1)*1000, numberOfHits);
@@ -362,6 +363,9 @@
 			for (eSciDocXmlObject obj : connector
 					.getObjectsFromFilterResult(command+query, objectXPath)) {
 
+				logger.info(String.format("%s/%s", counter,numberOfHits));
+				counter+=1;
+				
 				//TODO is the following really necessary, currently the obj in the list is sometimes not the current one.
 				try{
 					HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
@@ -436,7 +440,7 @@
 				new ECHOTransformer(), connector, "/ir/context/escidoc:1001");
 
 		// hv.readObjectsFromInstance("ECHO_collection");
-		hv.readObjectsFromInstance("ECHO_resource");
+		//hv.readObjectsFromInstance("ECHO_resource");
 
 		hv.releaseAndSubmitObjects(
 				"/ir/context/escidoc:1001/resources/members",
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java	Thu Jul 26 11:24:03 2012 +0200
+++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java	Mon Aug 20 12:59:13 2012 +0200
@@ -29,7 +29,7 @@
 		Logger.getLogger("addedFilesLogger").setLevel(Level.DEBUG);
 		Logger.getLogger("notAddedFilesLogger").setLevel(Level.DEBUG);
 		
-        rl.setLevel(Level.DEBUG);
+        rl.setLevel(Level.INFO);
 
         if (args.length<4){
         	System.out.println("Usage: harvest username password path context.");
@@ -58,10 +58,10 @@
 		
 		
 		//hv.readObjectsFromInstance("ECHO_collection");
-		hv.readObjectsFromInstance("ECHO_resource");
+		//hv.readObjectsFromInstance("ECHO_resource");
 		
 		//hv.releaseAndSubmitObjects("/ir/context/escidoc:12001");
-		hv.releaseAndSubmitObjects(context+"/resources/members","//escidocItem:item","added esidoc test id",0);
+		//hv.releaseAndSubmitObjects(context+"/resources/members","//escidocItem:item","added esidoc test id",0);
 		hv.releaseAndSubmitObjects(context+"/resources/members","//escidocItem:item","added esidoc test id",1);