diff src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifierRelationModell.java @ 8:a844f6948dd8

?nderungen im Walker tools f?r pubman
author dwinter
date Mon, 14 May 2012 09:58:45 +0200
parents
children e0efd3a9d2f0
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifierRelationModell.java	Mon May 14 09:58:45 2012 +0200
@@ -0,0 +1,216 @@
+package de.mpiwg.itgroup.eSciDoc.Tools.Pubman;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.net.URLEncoder;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.xml.parsers.SAXParser;
+
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.log4j.BasicConfigurator;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.input.SAXBuilder;
+import org.jdom.xpath.XPath;
+
+import sun.util.logging.resources.logging;
+
+import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
+import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
+import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
+import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
+
+/**
+ * Diese Klasse sucht aus den Pfaden im Pubman Eintrag der URL zu den Quellen den Identifier heraus und findet
+ * den zugehörigen Eintrag in den von der Foxridge in eScidoc geharvesten Metadaten. 
+ * 
+ * Daraus wird dann ein Container erstellt.
+ * @author dwinter
+ *
+ */
+public class PubmanFoxridgeIdentifierRelationModell {
+
+	Logger logger = Logger.getRootLogger();
+	public PubmanFoxridgeIdentifierRelationModell(){
+		
+	}
+	
+	public static void main(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{
+		String dateString="2011-06-08T11:48:50.287Z";
+		
+		String itemString="/ir/item/escidoc:162177";
+		
+		String escidocServer="escidoc-test.mpiwg-berlin.mpg.de";
+		EScidocBasicHandler hd = new EScidocBasicHandler(escidocServer, 8080, "dwinter", "fl0rian");
+	
+		if (args.length>0){
+			dateString=args[0];
+		}
+		
+		if (!hd.isCurrent(itemString,dateString)){
+			System.err.println("not the last version!");
+			System.exit(0);
+		}
+	
+		
+		List<String> existingRelations = hd.getIdsOfRelationFromObject(itemString,"http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember");
+		
+		String contextMembers="http://escidoc.mpiwg-berlin.mpg.de:8080/ir/context/escidoc:55281/resources/members";
+		
+		
+		
+		BasicConfigurator.configure();
+		Logger.getRootLogger().setLevel(Level.ERROR);
+		
+		PubmanFoxridgeIdentifierRelationModell pi = new PubmanFoxridgeIdentifierRelationModell();
+		
+		Set<String> identifiers = pi.getIdentifiersFromPubmanPath(contextMembers);
+		
+		Set <String> ids = new HashSet<String>();
+		int counter=0;
+		for (String id: identifiers){
+			
+			
+			String command=String.format("/ir/items?maximumRecords=1&operation=searchRetrieve&version=1.1&query=%%22%%2Fmd-records%%2Fmd-record%%2Fadmin%%2Fidentifier%%22%%3D%%22%s%%22", id);
+			//command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:1001%22";
+			//command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:40001%22";
+			//command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:12001%22";
+			System.out.println(command);
+			List<eSciDocXmlObject> objects = hd.getObjectsFromFilterResult(command, "/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item");
+			System.out.println("found");
+			for (eSciDocXmlObject obj:objects){
+				System.out.println("adding:");
+				
+				String addObjId = obj.getESciDocId();
+				if(existingRelations.contains(addObjId)){
+					System.out.println("already in relations:"+addObjId);
+				} else {
+				
+				ids.add(addObjId.replace("/ir/item/", ""));
+				counter+=1;
+				System.out.println("adding:"+obj.getESciDocId());
+				}
+			}
+			//if (counter>10)
+			//	break;
+			
+		}
+		String addMemberXML="<param last-modification-date=\""+dateString+"\">";
+		for(String id: ids){
+			addMemberXML+="<relation>"+
+			"<targetId>"+id+"</targetId>"+
+			"<predicate>http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember</predicate>" +
+			" </relation>";
+			
+		}
+		addMemberXML+="</param>";
+		
+		System.out.println("addmemberXML:"+addMemberXML);
+		File addFile = new File("/tmp/add.txt");
+		FileWriter fw = new FileWriter(addFile);
+		fw.write(addMemberXML);
+		fw.close();
+		
+		//HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161163/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
+		//HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161164/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
+		HttpResponse res = hd.eScidocPost(itemString+"/content-relations/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
+		System.out.println(EScidocBasicHandler.convertStreamToString(res.getEntity().getContent()));
+	}
+
+	private Set<String> getIdentifiersFromPubmanPath(String contextMembers) {
+		HttpGet get = new HttpGet(contextMembers);
+		DefaultHttpClient httpclient = new DefaultHttpClient();
+		Set<String> retSet = new HashSet<String>();
+		HttpResponse response;
+		try {
+			response = httpclient.execute(get);
+		} catch (ClientProtocolException e1) {
+			// TODO Auto-generated catch block
+			e1.printStackTrace();
+			return null;
+		} catch (IOException e1) {
+			// TODO Auto-generated catch block
+			e1.printStackTrace();
+			return null;
+		}
+		if (response.getStatusLine().getStatusCode()>200){
+			logger.error(contextMembers);
+			logger.error(response.getStatusLine().getReasonPhrase());
+			return null;
+		}
+		
+		HttpEntity ent = response.getEntity();
+		
+		SAXBuilder builder = new  SAXBuilder();
+		Document doc;
+		try {
+			doc = builder.build(ent.getContent());
+		} catch (IllegalStateException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return null;
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return null;
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return null;
+		}
+		XPath xpath=null;
+		try {
+			String xpathString="/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item/metadataRecords:md-records/metadataRecords:md-record/publication:publication/dc:identifier";
+			xpath = EScidocTools.getESciDocXpath(xpathString);
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return null;
+		}
+		List<Element> nodes;
+		try {
+			nodes= xpath.selectNodes(doc);
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return null;
+		}
+		File outAdd = new File("/tmp/outadded.txt");
+		try {
+			FileWriter fw = new FileWriter(outAdd);
+			for (Element el: nodes){
+				String text = el.getTextTrim();
+				Pattern x = Pattern.compile("/library/([^/]*)");
+				logger.debug("found:"+text);
+				Matcher m = x.matcher(text);
+				boolean matched = m.find();
+				if (matched){
+					String id = m.group(1);
+					logger.debug("adding:"+id);
+					retSet.add(id);
+					fw.write(id+"\n");
+				}
+			
+			}
+			fw.close();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			System.exit(0);
+		}
+		return retSet;
+	}
+}