Mercurial > hg > eSciDocImport
diff src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifierRelationModell.java @ 8:a844f6948dd8
?nderungen im Walker
tools f?r pubman
author | dwinter |
---|---|
date | Mon, 14 May 2012 09:58:45 +0200 |
parents | |
children | e0efd3a9d2f0 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifierRelationModell.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,216 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.net.URLEncoder; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.SAXParser; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.input.SAXBuilder; +import org.jdom.xpath.XPath; + +import sun.util.logging.resources.logging; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +/** + * Diese Klasse sucht aus den Pfaden im Pubman Eintrag der URL zu den Quellen den Identifier heraus und findet + * den zugehörigen Eintrag in den von der Foxridge in eScidoc geharvesten Metadaten. + * + * Daraus wird dann ein Container erstellt. + * @author dwinter + * + */ +public class PubmanFoxridgeIdentifierRelationModell { + + Logger logger = Logger.getRootLogger(); + public PubmanFoxridgeIdentifierRelationModell(){ + + } + + public static void main(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{ + String dateString="2011-06-08T11:48:50.287Z"; + + String itemString="/ir/item/escidoc:162177"; + + String escidocServer="escidoc-test.mpiwg-berlin.mpg.de"; + EScidocBasicHandler hd = new EScidocBasicHandler(escidocServer, 8080, "dwinter", "fl0rian"); + + if (args.length>0){ + dateString=args[0]; + } + + if (!hd.isCurrent(itemString,dateString)){ + System.err.println("not the last version!"); + System.exit(0); + } + + + List<String> existingRelations = hd.getIdsOfRelationFromObject(itemString,"http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember"); + + String contextMembers="http://escidoc.mpiwg-berlin.mpg.de:8080/ir/context/escidoc:55281/resources/members"; + + + + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.ERROR); + + PubmanFoxridgeIdentifierRelationModell pi = new PubmanFoxridgeIdentifierRelationModell(); + + Set<String> identifiers = pi.getIdentifiersFromPubmanPath(contextMembers); + + Set <String> ids = new HashSet<String>(); + int counter=0; + for (String id: identifiers){ + + + String command=String.format("/ir/items?maximumRecords=1&operation=searchRetrieve&version=1.1&query=%%22%%2Fmd-records%%2Fmd-record%%2Fadmin%%2Fidentifier%%22%%3D%%22%s%%22", id); + //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:1001%22"; + //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:40001%22"; + //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:12001%22"; + System.out.println(command); + List<eSciDocXmlObject> objects = hd.getObjectsFromFilterResult(command, "/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item"); + System.out.println("found"); + for (eSciDocXmlObject obj:objects){ + System.out.println("adding:"); + + String addObjId = obj.getESciDocId(); + if(existingRelations.contains(addObjId)){ + System.out.println("already in relations:"+addObjId); + } else { + + ids.add(addObjId.replace("/ir/item/", "")); + counter+=1; + System.out.println("adding:"+obj.getESciDocId()); + } + } + //if (counter>10) + // break; + + } + String addMemberXML="<param last-modification-date=\""+dateString+"\">"; + for(String id: ids){ + addMemberXML+="<relation>"+ + "<targetId>"+id+"</targetId>"+ + "<predicate>http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember</predicate>" + + " </relation>"; + + } + addMemberXML+="</param>"; + + System.out.println("addmemberXML:"+addMemberXML); + File addFile = new File("/tmp/add.txt"); + FileWriter fw = new FileWriter(addFile); + fw.write(addMemberXML); + fw.close(); + + //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161163/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); + //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161164/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); + HttpResponse res = hd.eScidocPost(itemString+"/content-relations/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); + System.out.println(EScidocBasicHandler.convertStreamToString(res.getEntity().getContent())); + } + + private Set<String> getIdentifiersFromPubmanPath(String contextMembers) { + HttpGet get = new HttpGet(contextMembers); + DefaultHttpClient httpclient = new DefaultHttpClient(); + Set<String> retSet = new HashSet<String>(); + HttpResponse response; + try { + response = httpclient.execute(get); + } catch (ClientProtocolException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + return null; + } catch (IOException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + return null; + } + if (response.getStatusLine().getStatusCode()>200){ + logger.error(contextMembers); + logger.error(response.getStatusLine().getReasonPhrase()); + return null; + } + + HttpEntity ent = response.getEntity(); + + SAXBuilder builder = new SAXBuilder(); + Document doc; + try { + doc = builder.build(ent.getContent()); + } catch (IllegalStateException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + XPath xpath=null; + try { + String xpathString="/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item/metadataRecords:md-records/metadataRecords:md-record/publication:publication/dc:identifier"; + xpath = EScidocTools.getESciDocXpath(xpathString); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + List<Element> nodes; + try { + nodes= xpath.selectNodes(doc); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + File outAdd = new File("/tmp/outadded.txt"); + try { + FileWriter fw = new FileWriter(outAdd); + for (Element el: nodes){ + String text = el.getTextTrim(); + Pattern x = Pattern.compile("/library/([^/]*)"); + logger.debug("found:"+text); + Matcher m = x.matcher(text); + boolean matched = m.find(); + if (matched){ + String id = m.group(1); + logger.debug("adding:"+id); + retSet.add(id); + fw.write(id+"\n"); + } + + } + fw.close(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + System.exit(0); + } + return retSet; + } +}