view src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifier.java @ 8:a844f6948dd8

?nderungen im Walker tools f?r pubman
author dwinter
date Mon, 14 May 2012 09:58:45 +0200
parents
children
line wrap: on
line source

package de.mpiwg.itgroup.eSciDoc.Tools.Pubman;

import java.io.IOException;
import java.net.URLEncoder;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.parsers.SAXParser;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.xpath.XPath;

import sun.util.logging.resources.logging;

import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;

/**
 * Diese Klasse sucht aus den Pfaden im Pubman Eintrag der URL zu den Quellen den Identifier heraus und findet
 * den zugeh�rigen Eintrag in den von der Foxridge in eScidoc geharvesten Metadaten. 
 * 
 * Daraus wird dann ein Container erstellt.
 * @author dwinter
 *
 */
public class PubmanFoxridgeIdentifier {

	Logger logger = Logger.getRootLogger();
	public PubmanFoxridgeIdentifier(){
		
	}
	
	public static void main(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{
		String contextMembers="http://escidoc.mpiwg-berlin.mpg.de:8080/ir/context/escidoc:55281/resources/members";
		
		String escidocServer="escidoc-test.mpiwg-berlin.mpg.de";
		
		BasicConfigurator.configure();
		Logger.getRootLogger().setLevel(Level.ERROR);
		
		PubmanFoxridgeIdentifier pi = new PubmanFoxridgeIdentifier();
		
		Set<String> identifiers = pi.getIdentifiersFromPubmanPath(contextMembers);
		EScidocBasicHandler hd = new EScidocBasicHandler(escidocServer, 8080, "dwinter", "weikiki7");
		Set <String> ids = new HashSet<String>();
		int counter=0;
		for (String id: identifiers){
			
			
			String command=String.format("/ir/items?maximumRecords=1&operation=searchRetrieve&version=1.1&query=%%22%%2Fmd-records%%2Fmd-record%%2Fadmin%%2Fidentifier%%22%%3D%%22%s%%22", id);
			//command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:1001%22";
			//command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:40001%22";
			command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:12001%22";
			System.out.println(command);
			List<eSciDocXmlObject> objects = hd.getObjectsFromFilterResult(command, "/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item");
			System.out.println("found");
			for (eSciDocXmlObject obj:objects){
				System.out.println("adding:");
				ids.add(obj.getESciDocId().replace("/ir/item/", ""));
				counter+=1;
				System.out.println("adding:"+obj.getESciDocId());
			}
			//if (counter>10)
			//	break;
			
		}
		String addMemberXML="<param last-modification-date=\"2011-06-07T19:30:28.517Z\">";
		for(String id: ids){
			addMemberXML+="<id>"+id+"</id>";
		}
		addMemberXML+="</param>";
		
		//HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161163/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
		//HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161164/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
		HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161165/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
		System.out.println(EScidocBasicHandler.convertStreamToString(res.getEntity().getContent()));
	}

	private Set<String> getIdentifiersFromPubmanPath(String contextMembers) {
		HttpGet get = new HttpGet(contextMembers);
		DefaultHttpClient httpclient = new DefaultHttpClient();
		Set<String> retSet = new HashSet<String>();
		HttpResponse response;
		try {
			response = httpclient.execute(get);
		} catch (ClientProtocolException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
			return null;
		} catch (IOException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
			return null;
		}
		if (response.getStatusLine().getStatusCode()>200){
			logger.error(contextMembers);
			logger.error(response.getStatusLine().getReasonPhrase());
			return null;
		}
		
		HttpEntity ent = response.getEntity();
		
		SAXBuilder builder = new  SAXBuilder();
		Document doc;
		try {
			doc = builder.build(ent.getContent());
		} catch (IllegalStateException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			return null;
		} catch (JDOMException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			return null;
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			return null;
		}
		XPath xpath=null;
		try {
			String xpathString="/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item/metadataRecords:md-records/metadataRecords:md-record/publication:publication/dc:identifier";
			xpath = EScidocTools.getESciDocXpath(xpathString);
		} catch (JDOMException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			return null;
		}
		List<Element> nodes;
		try {
			nodes= xpath.selectNodes(doc);
		} catch (JDOMException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			return null;
		}
		
		for (Element el: nodes){
			String text = el.getTextTrim();
			Pattern x = Pattern.compile("/library/([^/]*)");
			logger.debug("found:"+text);
			Matcher m = x.matcher(text);
			boolean matched = m.find();
			if (matched){
				String id = m.group(1);
				logger.debug("adding:"+id);
				retSet.add(id);
			}
			
		}
		return retSet;
	}
}