changeset 21:a3f9a9ecdd42 default tip

UTF-8 problem bei AddConeIds solved Neue Funktion zum Austuschen der Cone id'S beim editor
author dwinter
date Mon, 28 Oct 2013 14:53:43 +0100
parents d1f63ee9998d
children
files src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/AddConeIds.java src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceConeIdsInSource.java
diffstat 2 files changed, 96 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/AddConeIds.java	Mon Oct 28 10:27:36 2013 +0100
+++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/AddConeIds.java	Mon Oct 28 14:53:43 2013 +0100
@@ -1,22 +1,3 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 package de.mpiwg.itgroup.eSciDoc.Tools.Pubman;
 
 // geht durch alle eintraege und tested auf personen ids
@@ -25,11 +6,13 @@
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
@@ -188,7 +171,7 @@
 		
 		HashMap<String,List<List<String>>> newTerms = new HashMap<String, List<List<String>>>();
 		
-		String founds = readFileAsString("./tmp/found.csv");
+		String founds = readFileAsString("/tmp/found.csv");
 		String[] splitted = founds.split("\n");
 		//ir/item/escidoc:647775,McLaughlin,Peter,McLaughlin, Peter,http://pubman.mpiwg-berlin.mpg.de/cone/persons/resource/240
 		for (int i=0;i<splitted.length;i++){
@@ -236,7 +219,7 @@
 		Logger logger = Logger.getRootLogger();
 		logger.setLevel(Level.DEBUG);
 		EScidocBasicHandler connector = new EScidocBasicHandler(
-				"escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "XXXXX");
+				"escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "SSSS");
 
 		if(args.length<2){
 			System.out.println("Usage: startrecord maximumrecords");
@@ -287,7 +270,8 @@
 	private String readFileAsString(String filePath) throws IOException {
         StringBuffer fileData = new StringBuffer();
         BufferedReader reader = new BufferedReader(
-                new FileReader(filePath));
+        		 new InputStreamReader(
+                         new FileInputStream(filePath), "UTF8"));
         char[] buf = new char[1024];
         int numRead=0;
         while((numRead=reader.read(buf)) != -1){
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceConeIdsInSource.java	Mon Oct 28 14:53:43 2013 +0100
@@ -0,0 +1,90 @@
+package de.mpiwg.itgroup.eSciDoc.Tools.Pubman;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.xpath.XPath;
+
+import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
+import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
+import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
+
+public class ReplaceConeIdsInSource {
+
+	/**
+	 * @param args
+	 * @throws ESciDocXmlObjectException
+	 * @throws JDOMException
+	 * @throws IOException
+	 * @throws IllegalStateException
+	 */
+	public static void main(String[] args) throws IllegalStateException,
+			IOException, JDOMException, ESciDocXmlObjectException {
+
+		Logger logger = Logger.getRootLogger();
+		logger.setLevel(Level.DEBUG);
+		EScidocBasicHandler connector = new EScidocBasicHandler(
+				"escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "XXXX");
+
+		if(args.length<2){
+			System.out.println("Usage: startrecord maximumrecords");
+			System.exit(-1);
+		}
+		String MAX_REC = args[1];
+		String start = args[0];
+		String objectXPath = "//escidocItem:item";
+
+		String query = "?maximumRecords=" + String.valueOf(MAX_REC)
+				+ "&startRecord=" + String.valueOf(start);
+		String command = "/ir/context/escidoc:38279/resources/members";
+		for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult(
+				command + query, objectXPath)) {
+
+			Document doc = obj.getDocument();
+			Boolean changed=false;
+			XPath idXPath = EScidocTools.getESciDocXpath("//source:source//dc:identifier");
+			@SuppressWarnings("unchecked")
+			List<Element> identifiers = idXPath.selectNodes(doc);
+			for (Element identifier : identifiers) {
+				String idString = identifier.getTextTrim();
+				if (!idString.startsWith("http://pubman.mpiwg-berlin.mpg.de/cone/editors/resource")) {
+				idString=idString.replace("http://pubman.mpiwg-berlin.mpg.de/cone/persons/resource",
+							"http://pubman.mpiwg-berlin.mpg.de/cone/editors/resource");
+					identifier.setText(idString);
+					changed=true;
+				
+				}
+			
+			}
+			if (changed){
+				
+				Boolean retVal = connector.updateItem(obj);
+				System.out.println("Replaced:"+obj.getESciDocId());
+				HttpResponse retValu = connector.submitAnObject(obj, "changed cone identifiers");
+				
+				System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent()));
+				HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
+				HttpEntity ent = resObj.getEntity();
+				if (ent!=null){
+				obj= new eSciDocXmlObject(ent.getContent());
+				} else {
+					System.out.println("Can not retrieve:" + obj.getESciDocId());
+					continue;
+				}
+				
+				HttpResponse reValue2 = connector.releaseAnObject(obj, "changed cone identifiers");
+				System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent()));
+				
+			}
+		}
+
+	}
+}