Mercurial > hg > eSciDocImport
view src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/AddConeIds.java @ 19:671c6e3449f7
add coneids added
author | dwinter |
---|---|
date | Mon, 27 May 2013 15:06:53 +0200 |
parents | |
children | d1f63ee9998d |
line wrap: on
line source
package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; // geht durch alle eintraege und tested auf personen ids // gegebenefalls werden sie hinzugefuegt (dazu python tool addconeids aufrufen. import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.Namespace; import org.jdom.xpath.XPath; import com.sun.xml.internal.xsom.impl.scd.Iterators.Map; import sun.security.krb5.internal.crypto.Nonce; import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; public class AddConeIds { /** * @param connector * @param args * @throws ESciDocXmlObjectException * @throws JDOMException * @throws IOException * @throws IllegalStateException */ public void findPersons(eSciDocXmlObject obj,BufferedWriter out,HashMap<String,List<List<String>>>newTerms, EScidocBasicHandler connector) throws JDOMException, IOException, IllegalStateException, ESciDocXmlObjectException{ Document doc = obj.getDocument(); Boolean changed=false; XPath personXPath = EScidocTools.getESciDocXpath("//person:person"); @SuppressWarnings("unchecked") List<Element> persons = personXPath.selectNodes(doc); XPath familyNameXpath = EScidocTools.getESciDocXpath(".//eterms:family-name"); XPath givenNameXpath = EScidocTools.getESciDocXpath(".//eterms:given-name"); XPath completeNameXpath = EScidocTools.getESciDocXpath(".//eterms:complete-name"); XPath identifierXpath = EScidocTools.getESciDocXpath("./dc:identifier"); for (Element person : persons) { String famName=""; Element fam = (Element)familyNameXpath.selectSingleNode(person); if (fam != null){ famName = fam.getTextTrim(); } Element giv = (Element)givenNameXpath.selectSingleNode(person); String givName =""; if (fam != null){ givName = giv.getTextTrim(); } Element compl = (Element)completeNameXpath.selectSingleNode(person); String complName=""; if (fam != null){ complName = compl.getTextTrim(); } List<Element> ids = (List<Element>)identifierXpath.selectNodes(person); List<String>identifiers = new ArrayList<String> (); for (Element id : ids){ identifiers.add(id.getTextTrim()); } for (String identifier : identifiers){ String outStr = String.format("%s,%s,%s,%s,%s\n",obj.getESciDocId(),famName,givName,complName,identifier); out.write(outStr); } if (identifiers.size()==0){ String outStr = String.format("%s,%s,%s,%s,%s\n",obj.getESciDocId(),famName,givName,complName,""); if (newTerms !=null){ // es existieren neue cone eintrage if (newTerms.containsKey(obj.getESciDocId())){ // ergŠnzung existiert for (List<String> entry : newTerms.get(obj.getESciDocId())){ //hole diese if ( entry.get(0).equals(famName) && entry.get(1).equals(givName)){ int size= entry.size(); String coneId = entry.get(size-1); Element newIdent = new Element("identifier", EScidocTools.DC); Namespace ns = Namespace.getNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance"); newIdent.setAttribute("type", "eterms:CONE",ns); newIdent.setText(coneId); person.addContent(newIdent); //System.out.println(obj.printXML()); connector.updateItem(obj); HttpResponse retValue = connector.submitAnObject(obj, "changed cone identifiers"); System.out.println(EScidocBasicHandler.convertStreamToString(retValue.getEntity().getContent())); // // System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); // HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); HttpEntity ent = resObj.getEntity(); if (ent!=null){ obj= new eSciDocXmlObject(ent.getContent()); } else { System.out.println("Can not retrieve:" + obj.getESciDocId()); continue; } retValue = connector.releaseAnObject(obj, "changed cone identifiers"); System.out.println(EScidocBasicHandler.convertStreamToString(retValue.getEntity().getContent())); } } } } out.write(outStr); } out.flush(); } } public HashMap<String,List<List<String>>> readEscidocToIdentifier() throws IOException{ HashMap<String,List<List<String>>> newTerms = new HashMap<String, List<List<String>>>(); String founds = readFileAsString("./tmp/found.csv"); String[] splitted = founds.split("\n"); //ir/item/escidoc:647775,McLaughlin,Peter,McLaughlin, Peter,http://pubman.mpiwg-berlin.mpg.de/cone/persons/resource/240 for (int i=0;i<splitted.length;i++){ String line = splitted[i]; String[] lineSplitted = line.split(","); if (!newTerms.containsKey(lineSplitted[0])){ newTerms.put(lineSplitted[0], new ArrayList<List<String>>()); } List<List<String>> content = newTerms.get(lineSplitted[0]); ArrayList<String> entry = new ArrayList<String>(); for (int j=1;j<lineSplitted.length;j++){ entry.add(lineSplitted[j]); } content.add(entry); } return newTerms; } public void findAllPersons(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{ File file = new File("/tmp/ids.csv"); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF8")); Logger logger = Logger.getRootLogger(); logger.setLevel(Level.DEBUG); EScidocBasicHandler connector = new EScidocBasicHandler( "escidoc.mpiwg-berlin.mpg.de", 8080, "itgroup", "XXX"); if(args.length<2){ System.out.println("Usage: startrecord maximumrecords"); System.exit(-1); } String MAX_REC = args[1]; String start = args[0]; String objectXPath = "//escidocItem:item"; String query = "?maximumRecords=" + String.valueOf(MAX_REC) + "&startRecord=" + String.valueOf(start); String command = "/ir/context/escidoc:38279/resources/members"; for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( command + query, objectXPath)) { System.out.println(obj.getESciDocId()); HashMap<String, List<List<String>>> newTerms = readEscidocToIdentifier(); findPersons(obj,out,newTerms,connector); // if (changed){ // // Boolean retVal = connector.updateItem(obj); // System.out.println("Replaced:"+obj.getESciDocId()); // HttpResponse retValu = connector.submitAnObject(obj, "changed cone identifiers"); // // System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); // HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); // HttpEntity ent = resObj.getEntity(); // if (ent!=null){ // obj= new eSciDocXmlObject(ent.getContent()); // } else { // System.out.println("Can not retrieve:" + obj.getESciDocId()); // continue; // } // // HttpResponse reValue2 = connector.releaseAnObject(obj, "changed cone identifiers"); // System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); // // // } } out.close(); } private String readFileAsString(String filePath) throws IOException { StringBuffer fileData = new StringBuffer(); BufferedReader reader = new BufferedReader( new FileReader(filePath)); char[] buf = new char[1024]; int numRead=0; while((numRead=reader.read(buf)) != -1){ String readData = String.valueOf(buf, 0, numRead); fileData.append(readData); } reader.close(); return fileData.toString(); } public static void main(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException { AddConeIds adders = new AddConeIds(); adders.findAllPersons(args); } }