Mercurial > hg > eSciDocImport
view src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifierRelationModell.java @ 19:671c6e3449f7
add coneids added
author | dwinter |
---|---|
date | Mon, 27 May 2013 15:06:53 +0200 |
parents | e0efd3a9d2f0 |
children |
line wrap: on
line source
package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.net.URLEncoder; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.parsers.SAXParser; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; import org.jdom.xpath.XPath; import sun.util.logging.resources.logging; import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; /** * Diese Klasse sucht aus den Pfaden im Pubman Eintrag der URL zu den Quellen den Identifier heraus und findet * den zugehörigen Eintrag in den von der Foxridge in eScidoc geharvesten Metadaten. * * Daraus wird dann ein Container erstellt. * @author dwinter * */ public class PubmanFoxridgeIdentifierRelationModell { Logger logger = Logger.getRootLogger(); public PubmanFoxridgeIdentifierRelationModell(){ } public static void main(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{ String dateString="2011-06-08T11:48:50.287Z"; String itemString="/ir/item/escidoc:162177"; String escidocServer="escidoc-test.mpiwg-berlin.mpg.de"; EScidocBasicHandler hd = new EScidocBasicHandler(escidocServer, 8080, "dwinter", "XXX"); if (args.length>0){ dateString=args[0]; } if (!hd.isCurrent(itemString,dateString)){ System.err.println("not the last version!"); System.exit(0); } List<String> existingRelations = hd.getIdsOfRelationFromObject(itemString,"http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember"); String contextMembers="http://escidoc.mpiwg-berlin.mpg.de:8080/ir/context/escidoc:55281/resources/members"; BasicConfigurator.configure(); Logger.getRootLogger().setLevel(Level.ERROR); PubmanFoxridgeIdentifierRelationModell pi = new PubmanFoxridgeIdentifierRelationModell(); Set<String> identifiers = pi.getIdentifiersFromPubmanPath(contextMembers); Set <String> ids = new HashSet<String>(); int counter=0; for (String id: identifiers){ String command=String.format("/ir/items?maximumRecords=1&operation=searchRetrieve&version=1.1&query=%%22%%2Fmd-records%%2Fmd-record%%2Fadmin%%2Fidentifier%%22%%3D%%22%s%%22", id); //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:1001%22"; //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:40001%22"; //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:12001%22"; System.out.println(command); List<eSciDocXmlObject> objects = hd.getObjectsFromFilterResult(command, "/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item"); System.out.println("found"); for (eSciDocXmlObject obj:objects){ System.out.println("adding:"); String addObjId = obj.getESciDocId(); if(existingRelations.contains(addObjId)){ System.out.println("already in relations:"+addObjId); } else { ids.add(addObjId.replace("/ir/item/", "")); counter+=1; System.out.println("adding:"+obj.getESciDocId()); } } //if (counter>10) // break; } String addMemberXML="<param last-modification-date=\""+dateString+"\">"; for(String id: ids){ addMemberXML+="<relation>"+ "<targetId>"+id+"</targetId>"+ "<predicate>http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember</predicate>" + " </relation>"; } addMemberXML+="</param>"; System.out.println("addmemberXML:"+addMemberXML); File addFile = new File("/tmp/add.txt"); FileWriter fw = new FileWriter(addFile); fw.write(addMemberXML); fw.close(); //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161163/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161164/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); HttpResponse res = hd.eScidocPost(itemString+"/content-relations/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); System.out.println(EScidocBasicHandler.convertStreamToString(res.getEntity().getContent())); } private Set<String> getIdentifiersFromPubmanPath(String contextMembers) { HttpGet get = new HttpGet(contextMembers); DefaultHttpClient httpclient = new DefaultHttpClient(); Set<String> retSet = new HashSet<String>(); HttpResponse response; try { response = httpclient.execute(get); } catch (ClientProtocolException e1) { // TODO Auto-generated catch block e1.printStackTrace(); return null; } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); return null; } if (response.getStatusLine().getStatusCode()>200){ logger.error(contextMembers); logger.error(response.getStatusLine().getReasonPhrase()); return null; } HttpEntity ent = response.getEntity(); SAXBuilder builder = new SAXBuilder(); Document doc; try { doc = builder.build(ent.getContent()); } catch (IllegalStateException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } XPath xpath=null; try { String xpathString="/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item/metadataRecords:md-records/metadataRecords:md-record/publication:publication/dc:identifier"; xpath = EScidocTools.getESciDocXpath(xpathString); } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } List<Element> nodes; try { nodes= xpath.selectNodes(doc); } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } File outAdd = new File("/tmp/outadded.txt"); try { FileWriter fw = new FileWriter(outAdd); for (Element el: nodes){ String text = el.getTextTrim(); Pattern x = Pattern.compile("/library/([^/]*)"); logger.debug("found:"+text); Matcher m = x.matcher(text); boolean matched = m.find(); if (matched){ String id = m.group(1); logger.debug("adding:"+id); retSet.add(id); fw.write(id+"\n"); } } fw.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(0); } return retSet; } }