Mercurial > hg > eSciDocImport
changeset 21:a3f9a9ecdd42 default tip
UTF-8 problem bei AddConeIds solved
Neue Funktion zum Austuschen der Cone id'S beim editor
author | dwinter |
---|---|
date | Mon, 28 Oct 2013 14:53:43 +0100 |
parents | d1f63ee9998d |
children | |
files | src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/AddConeIds.java src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceConeIdsInSource.java |
diffstat | 2 files changed, 96 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/AddConeIds.java Mon Oct 28 10:27:36 2013 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/AddConeIds.java Mon Oct 28 14:53:43 2013 +0100 @@ -1,22 +1,3 @@ - - - - - - - - - - - - - - - - - - - package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; // geht durch alle eintraege und tested auf personen ids @@ -25,11 +6,13 @@ import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.util.ArrayList; @@ -188,7 +171,7 @@ HashMap<String,List<List<String>>> newTerms = new HashMap<String, List<List<String>>>(); - String founds = readFileAsString("./tmp/found.csv"); + String founds = readFileAsString("/tmp/found.csv"); String[] splitted = founds.split("\n"); //ir/item/escidoc:647775,McLaughlin,Peter,McLaughlin, Peter,http://pubman.mpiwg-berlin.mpg.de/cone/persons/resource/240 for (int i=0;i<splitted.length;i++){ @@ -236,7 +219,7 @@ Logger logger = Logger.getRootLogger(); logger.setLevel(Level.DEBUG); EScidocBasicHandler connector = new EScidocBasicHandler( - "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "XXXXX"); + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "SSSS"); if(args.length<2){ System.out.println("Usage: startrecord maximumrecords"); @@ -287,7 +270,8 @@ private String readFileAsString(String filePath) throws IOException { StringBuffer fileData = new StringBuffer(); BufferedReader reader = new BufferedReader( - new FileReader(filePath)); + new InputStreamReader( + new FileInputStream(filePath), "UTF8")); char[] buf = new char[1024]; int numRead=0; while((numRead=reader.read(buf)) != -1){
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceConeIdsInSource.java Mon Oct 28 14:53:43 2013 +0100 @@ -0,0 +1,90 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class ReplaceConeIdsInSource { + + /** + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, + IOException, JDOMException, ESciDocXmlObjectException { + + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.DEBUG); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "XXXX"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:38279/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + XPath idXPath = EScidocTools.getESciDocXpath("//source:source//dc:identifier"); + @SuppressWarnings("unchecked") + List<Element> identifiers = idXPath.selectNodes(doc); + for (Element identifier : identifiers) { + String idString = identifier.getTextTrim(); + if (!idString.startsWith("http://pubman.mpiwg-berlin.mpg.de/cone/editors/resource")) { + idString=idString.replace("http://pubman.mpiwg-berlin.mpg.de/cone/persons/resource", + "http://pubman.mpiwg-berlin.mpg.de/cone/editors/resource"); + identifier.setText(idString); + changed=true; + + } + + } + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "changed cone identifiers"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "changed cone identifiers"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } +}