view src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/AddConeIds.java @ 21:a3f9a9ecdd42 default tip

UTF-8 problem bei AddConeIds solved Neue Funktion zum Austuschen der Cone id'S beim editor
author dwinter
date Mon, 28 Oct 2013 14:53:43 +0100
parents d1f63ee9998d
children
line wrap: on
line source

package de.mpiwg.itgroup.eSciDoc.Tools.Pubman;

// geht durch alle eintraege und tested auf personen ids

// gegebenefalls werden sie hinzugefuegt (dazu python tool addconeids aufrufen.
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.xpath.XPath;


import com.sun.xml.internal.xsom.impl.scd.Iterators.Map;

import sun.security.krb5.internal.crypto.Nonce;

import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;




public class AddConeIds {

	/**
	 * @param connector 
	 * @param args
	 * @throws ESciDocXmlObjectException
	 * @throws JDOMException
	 * @throws IOException
	 * @throws IllegalStateException
	 */
	
	
	public void findPersons(eSciDocXmlObject obj,BufferedWriter out,HashMap<String,List<List<String>>>newTerms, EScidocBasicHandler connector) throws JDOMException, IOException, IllegalStateException, ESciDocXmlObjectException{
		
				
		Document doc = obj.getDocument();
		Boolean changed=false;
		XPath personXPath = EScidocTools.getESciDocXpath("//person:person");
		@SuppressWarnings("unchecked")
		List<Element> persons = personXPath.selectNodes(doc);
		
		XPath familyNameXpath = EScidocTools.getESciDocXpath(".//eterms:family-name"); 
		XPath givenNameXpath = EScidocTools.getESciDocXpath(".//eterms:given-name");
		XPath completeNameXpath = EScidocTools.getESciDocXpath(".//eterms:complete-name");
		
		XPath identifierXpath = EScidocTools.getESciDocXpath("./dc:identifier");
		
		
		for (Element person : persons) {
			
			String famName="";
			
			Element fam = (Element)familyNameXpath.selectSingleNode(person);
			
			if (fam != null){
				famName = fam.getTextTrim();
			}
			
			Element giv = (Element)givenNameXpath.selectSingleNode(person);
			
			String givName ="";
			if (fam != null){
				givName = giv.getTextTrim();
			}
			
			Element compl = (Element)completeNameXpath.selectSingleNode(person);
			
			String complName="";
			if (fam != null){
				complName = compl.getTextTrim();
			}
			
			
			List<Element> ids = (List<Element>)identifierXpath.selectNodes(person);
			
			
			List<String>identifiers = new ArrayList<String> ();
			for (Element id : ids){
				identifiers.add(id.getTextTrim());
				
			}
			
			for (String identifier : identifiers){
				String outStr = String.format("%s,%s,%s,%s,%s\n",obj.getESciDocId(),famName,givName,complName,identifier);
				out.write(outStr);
			}
			
			if (identifiers.size()==0){
				String outStr = String.format("%s,%s,%s,%s,%s\n",obj.getESciDocId(),famName,givName,complName,"");
				
				
				if (newTerms !=null){ // es existieren neue cone eintrage
					if (newTerms.containsKey(obj.getESciDocId())){ // ergŠnzung existiert
						for (List<String> entry : newTerms.get(obj.getESciDocId())){ //hole diese
							if ( entry.get(0).equals(famName) && entry.get(1).equals(givName)){
								
								int size= entry.size();
								String coneId = entry.get(size-1);
								
								
								Element newIdent = new Element("identifier", EScidocTools.DC);
								Namespace ns = Namespace.getNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance");
								newIdent.setAttribute("type", "eterms:CONE",ns);
								newIdent.setText(coneId);
								person.addContent(newIdent);
								
								//System.out.println(obj.printXML());
								connector.updateItem(obj);
								HttpResponse retValue = connector.submitAnObject(obj, "changed cone identifiers");
								System.out.println(EScidocBasicHandler.convertStreamToString(retValue.getEntity().getContent()));
//								
//								System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent()));
//								
								
								
								HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
								HttpEntity ent = resObj.getEntity();
								if (ent!=null){
								obj= new eSciDocXmlObject(ent.getContent());
							} else {
								System.out.println("Can not retrieve:" + obj.getESciDocId());
									continue;
								}
							
								
								retValue = connector.releaseAnObject(obj, "changed cone identifiers");
								System.out.println(EScidocBasicHandler.convertStreamToString(retValue.getEntity().getContent()));
							}
							
						}
					}
					
				}
				out.write(outStr);
			}
			
			out.flush();
			
			
			}
		
		
	}
	
	
	public HashMap<String,List<List<String>>> readEscidocToIdentifier() throws IOException{
		
		HashMap<String,List<List<String>>> newTerms = new HashMap<String, List<List<String>>>();
		
		String founds = readFileAsString("/tmp/found.csv");
		String[] splitted = founds.split("\n");
		//ir/item/escidoc:647775,McLaughlin,Peter,McLaughlin, Peter,http://pubman.mpiwg-berlin.mpg.de/cone/persons/resource/240
		for (int i=0;i<splitted.length;i++){
			
			
			
			String line = splitted[i];
			
			String[] lineSplitted = line.split(",");
			
			
			
				
			
			if (!newTerms.containsKey(lineSplitted[0])){
					newTerms.put(lineSplitted[0], new ArrayList<List<String>>());
				}
			
				List<List<String>> content = newTerms.get(lineSplitted[0]);
			
				
				ArrayList<String> entry = new ArrayList<String>();
				
			
			
				for (int j=1;j<lineSplitted.length;j++){
					entry.add(lineSplitted[j]);
				}
				
				content.add(entry);
		}
		
		
		return newTerms;
		
	}
		

	public void findAllPersons(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{
		File file = new File("/tmp/ids.csv");
	
		BufferedWriter out = 
				new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF8"));
		
		Logger logger = Logger.getRootLogger();
		logger.setLevel(Level.DEBUG);
		EScidocBasicHandler connector = new EScidocBasicHandler(
				"escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "SSSS");

		if(args.length<2){
			System.out.println("Usage: startrecord maximumrecords");
			System.exit(-1);
		}
		String MAX_REC = args[1];
		String start = args[0];
		String objectXPath = "//escidocItem:item";

		String query = "?maximumRecords=" + String.valueOf(MAX_REC)
				+ "&startRecord=" + String.valueOf(start);
		String command = "/ir/context/escidoc:38279/resources/members";
		for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult(
				command + query, objectXPath)) {

			System.out.println(obj.getESciDocId());
			HashMap<String, List<List<String>>> newTerms = readEscidocToIdentifier();
			findPersons(obj,out,newTerms,connector);
			
		
			
//			if (changed){
//				
//				Boolean retVal = connector.updateItem(obj);
//				System.out.println("Replaced:"+obj.getESciDocId());
//				HttpResponse retValu = connector.submitAnObject(obj, "changed cone identifiers");
//				
//				System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent()));
//				HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
//				HttpEntity ent = resObj.getEntity();
//				if (ent!=null){
//				obj= new eSciDocXmlObject(ent.getContent());
//				} else {
//					System.out.println("Can not retrieve:" + obj.getESciDocId());
//					continue;
//				}
//				
//				HttpResponse reValue2 = connector.releaseAnObject(obj, "changed cone identifiers");
//				System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent()));
//
//			
//			}
		}
		out.close();
	}
	
	
	private String readFileAsString(String filePath) throws IOException {
        StringBuffer fileData = new StringBuffer();
        BufferedReader reader = new BufferedReader(
        		 new InputStreamReader(
                         new FileInputStream(filePath), "UTF8"));
        char[] buf = new char[1024];
        int numRead=0;
        while((numRead=reader.read(buf)) != -1){
            String readData = String.valueOf(buf, 0, numRead);
            fileData.append(readData);
        }
        reader.close();
        return fileData.toString();
    }
	
	public static void main(String[] args) throws IllegalStateException,
			IOException, JDOMException, ESciDocXmlObjectException {

		
		AddConeIds adders = new AddConeIds();
		
		adders.findAllPersons(args);
		
	}
	
}