# HG changeset patch # User dwinter # Date 1336982325 -7200 # Node ID a844f6948dd8bd23e6e6e7b988b65b28d5f141d1 # Parent df8c62d84f8f3ccad99deff5ba5d23afb7800769 ?nderungen im Walker tools f?r pubman diff -r df8c62d84f8f -r a844f6948dd8 .classpath --- a/.classpath Sat Jan 15 17:05:29 2011 +0100 +++ b/.classpath Mon May 14 09:58:45 2012 +0200 @@ -1,7 +1,7 @@ - + diff -r df8c62d84f8f -r a844f6948dd8 .settings/org.eclipse.core.resources.prefs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/org.eclipse.core.resources.prefs Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,4 @@ +#Wed Jun 08 08:31:49 CEST 2011 +eclipse.preferences.version=1 +encoding//src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifier.java=UTF-8 +encoding//src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifierRelationModell.java=UTF-8 diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/AddMPIWGIdentifiers.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/AddMPIWGIdentifiers.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,179 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.input.SAXBuilder; +import org.jdom.xpath.XPath; + +import sun.util.logging.resources.logging; + +import com.sun.org.apache.xerces.internal.parsers.SAXParser; + +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class AddMPIWGIdentifiers { + + /** + * Erzeuge MPIWG identifier in den administrativen MPIWG Metadaten. + * Solange der Identifier nicht in den Metadaten (index.meta) steht, wird dazu der Filename genommen. + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws ClientProtocolException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, ClientProtocolException, IOException, JDOMException, ESciDocXmlObjectException { + // TODO Auto-generated method stub + BasicConfigurator.configure(); + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.INFO); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc-test.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl0rian"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:12001/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + XPath archivePathXP = EScidocTools.getESciDocXpath("//mpiwg:admin/mpiwg:archivePath"); + XPath mpiwgMDXP= EScidocTools.getESciDocXpath("//mpiwg:admin"); + XPath mpiwgIdentifierXP= EScidocTools.getESciDocXpath("//mpiwg:admin/mpiwg:identifier"); + + + Element archivePathNode = (Element) archivePathXP.selectSingleNode(doc); + if(archivePathNode==null) // kein Treffer + continue; + + Element mpiwgIDNode = (Element) mpiwgIdentifierXP.selectSingleNode(doc); + if(mpiwgIDNode!=null){ // kein Treffer + System.out.println("ID already exists:"+mpiwgIDNode.getTextTrim()); + continue; + } + + + + + String archivePath=archivePathNode.getTextTrim(); + String[] pathElements=archivePath.split("/"); + int length=pathElements.length; + String mpiwgID=pathElements[length-1]; + Element mpiwgMDNode= (Element) mpiwgMDXP.selectSingleNode(doc); + + if(mpiwgID.equals("")) + mpiwgID=handleEmptyID(doc,mpiwgMDNode); + + + + + mpiwgIDNode= new Element("identifier", EScidocTools.MPIWG); + mpiwgIDNode.setText(mpiwgID); + mpiwgMDNode.addContent(mpiwgIDNode); + + //correct release number, some objecte don't have an release number, why?? + + + Element latestRelease = (Element)EScidocBasicHandler.getXPath(doc.getRootElement(),"//escidocItem:properties/prop:latest-release/release:number",true); + if(latestRelease==null){ + logger.info("not released yet:"+obj.getESciDocId()); + } else { + String rn= latestRelease.getTextTrim(); + if (rn.equals("")){ + String latestVersionNumber = ((Element)EScidocBasicHandler.getXPath(doc.getRootElement(),"//escidocItem:properties/prop:version/version:number",true)).getTextTrim(); + logger.info("have to add relase number!"); + logger.info("Will add:"+latestVersionNumber); + + latestRelease.setText(latestVersionNumber); + } + } + System.out.println("Adding:"+mpiwgID); + changed=true; + + + //System.out.println(obj.printXML()); + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "adding identifier"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "repairing publishing info"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } + + private static String handleEmptyID(Document doc, Element mpiwgMDNode) throws JDOMException, IOException { + XPath xlinkXP = EScidocTools.getESciDocXpath("//escidocComponents:component[escidocComponents:properties/prop:content-category/text()='index_meta']/@xlink:href"); + + SAXBuilder sb = new SAXBuilder(); + Object node = xlinkXP.selectSingleNode(doc); + Attribute attr =(Attribute) node; + String md = attr.getValue(); + + Document doc2 = sb.build("http://escidoc-test.mpiwg-berlin.mpg.de:8080"+md+"/content"); + XPath apXP= EScidocTools.getESciDocXpath("//resource/archive-path"); + XPath imXP= EScidocTools.getESciDocXpath("//texttool/image"); + + Element apNode = (Element)apXP.selectSingleNode(doc2); + String ap=""; + if(apNode!=null) + { + ap = apNode.getTextTrim(); + XPath apNewXP= EScidocTools.getESciDocXpath(".//mpiwg:archivePath"); + Element apNewNode = (Element) apNewXP.selectSingleNode(mpiwgMDNode); + apNewNode.setText(ap); + } + Element imNode = (Element)imXP.selectSingleNode(doc2); + + if(imNode!=null) + { + String im = imNode.getTextTrim(); + XPath imNewXP= EScidocTools.getESciDocXpath(".//mpiwg:imageFolder"); + Element imNewNode = (Element) imNewXP.selectSingleNode(mpiwgMDNode); + imNewNode.setText(ap+"/"+im); + } + + String[] pathElements=ap.split("/"); + int length=pathElements.length; + return pathElements[length-1]; + + + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java --- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java Sat Jan 15 17:05:29 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java Mon May 14 09:58:45 2012 +0200 @@ -24,6 +24,7 @@ import org.apache.commons.codec.EncoderException; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; +import org.apache.http.StatusLine; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpDelete; @@ -58,8 +59,6 @@ - - /** * Handler to create a connection with an eScidoc server * @author dwinter @@ -204,6 +203,8 @@ } + + /** Send a delete command to the escidoc client. performs a login if necessara * @param command * @return @@ -325,7 +326,7 @@ } /** - * Find the content-modell (item) + * Find the content-model (item) * @param ret * @return * @throws IOException @@ -411,8 +412,8 @@ return node.getText(); } - public static Object getXPath(Element node, String string,boolean single) throws JDOMException { - XPath xpath= XPath.newInstance(string); + public static Object getXPath(Element node, String xpathString,boolean single) throws JDOMException { + XPath xpath= XPath.newInstance(xpathString); xpath.addNamespace("dc",EScidocTools.DC); xpath.addNamespace("escidocComponents",EScidocTools.escidocComponents); xpath.addNamespace("prop",EScidocTools.prop); @@ -435,6 +436,33 @@ + /** Updates an item at the eScidocserver + * @param escidocItem + * @return + * @throws JDOMException + */ + public Boolean updateItem(eSciDocXmlObject escidocItem) throws JDOMException { + String cmd=escidocItem.getESciDocId(); + + try { + String retStr = escidocItem.printXML(); + logger.debug(retStr); + String newObj = createObject(cmd, retStr); + return escidocItem.upDateFromXML(newObj); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return false; + } + + + } + + /** + * Creates an item + * @param escidocItem + * @return + */ public Boolean createItem(eSciDocXmlObject escidocItem) { String cmd="/ir/item"; @@ -451,6 +479,13 @@ } + /** + * Creates a new object, i.e. does a httPut on command + * @param command should accept an httPut for creating new object, e.g. /ir/irtem + * @param xml eScidocXML describing the new object + * @return + * @throws Exception + */ public String createObject (String command,String xml) throws Exception { @@ -486,6 +521,13 @@ + /** + * Searches in a given context for an object's pid and return its escidoc:id + * @param pid + * @param context + * @return + * @throws ConnectorException + */ public String getIDfromPID(String pid, String context) throws ConnectorException { @@ -538,6 +580,15 @@ + /** + * submits the object + * @param obj + * @param comment + * @return + * @throws ClientProtocolException + * @throws IOException + * @throws JDOMException + */ public HttpResponse submitAnObject(eSciDocXmlObject obj, String comment) throws ClientProtocolException, IOException, JDOMException { try { //addVersionPid(obj); @@ -559,6 +610,17 @@ } + /** + * Generates a list of eScidocObject from a search or filter + * @param command search or filter command (e.g. /resources/members/filter) + * @param objectXpath xpath to the element which separates the objects in the resultXML of the filering or search, e.g. + * "/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item" in a search + * @return + * @throws IOException + * @throws IllegalStateException + * @throws JDOMException + * @throws ESciDocXmlObjectException + */ public List getObjectsFromFilterResult(String command, String objectXpath) throws IOException, IllegalStateException, JDOMException,ESciDocXmlObjectException { //String filter = ""; // @@ -585,6 +647,16 @@ return ret; } + /** + * @param command search or filter command (e.g. /resources/members/filter) + * @param objectXpath xpath to the element which separates the objects in the resultXML of the filering or search, e.g. + * "/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item" in a search + * @param mode mode=0 or 2 for pending, mode=1 for submitted objects + * @return + * @throws IOException + * @throws IllegalStateException + * @throws JDOMException + */ public Integer getNumberOfHitsFromFilterResult(String command, String objectXPath, int mode) throws IOException, IllegalStateException, JDOMException { String query=""; @@ -607,6 +679,13 @@ return tmpInt; } + /** adds a PID generated by the MPIWG PID generator to the object. + * @param obj + * @return + * @throws ClientProtocolException + * @throws IOException + * @throws JDOMException + */ public boolean addVersionPid(eSciDocXmlObject obj) throws ClientProtocolException, IOException, JDOMException{ //HttpResponse ret = eScidocGet(href); //String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); @@ -639,6 +718,13 @@ + /** + * @param obj + * @param comment + * @return + * @throws IOException + * @throws JDOMException + */ public HttpResponse releaseAnObject(eSciDocXmlObject obj, String comment) throws IOException, JDOMException { //HttpResponse ret = getEsciDocHandler().eScidocGet(href); //String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); @@ -676,20 +762,28 @@ - public ECHOObject alreadyExists(String indexField, String testString, String context) throws ConnectorException, ObjectNotUniqueError { + /**check if object already exists + * @param indexField field in the /ir/items object to identify the object, for a list see /ir/items?operation=explain + * @param testString test if this exist in field. + * @param context + * @return + * @throws ConnectorException + * @throws ObjectNotUniqueError + */ + public ECHOObject alreadyExists(String indexField, String testString, String context, String comparator) throws ConnectorException, ObjectNotUniqueError { String[] ct = context.split("/"); // gebraucht wird hier nur die id, dh ohne /ir/... String contextId=ct[ct.length-1]; - String searchString = String.format("\"%s\"=\"%s\"",indexField,testString); - searchString += " and "+String.format("\"%s\"=\"%s\"","/properties/context/id",contextId); + String searchString = String.format("\"%s\"%s\"%s\"",indexField,comparator,testString); + searchString += " and "+String.format("\"%s\"=\"%s\"","context.objid",contextId); HttpResponse ret; try{ searchString = URLEncoder.encode(searchString,"utf-8"); - ret = eScidocGet("/ir/items?operation=searchRetrieve&version=1.1&query="+searchString); + ret = eScidocGet("/srw/search/escidoc_all?operation=searchRetrieve&version=1.1&query="+searchString); } catch (UnsupportedEncodingException e) { throw new ConnectorException(); } catch (IOException e) { @@ -724,6 +818,12 @@ if (hits>1) throw new ObjectNotUniqueError(); return getOldObjectFromESciDoc(doc); + //XPath xpItem = EScidocTools.getESciDocXpath("//escidocItem:item"); + + + //Element attr = (Element) xp.selectSingleNode(doc); + + //return getOldObjectFromESciDoc(attr); } return null; } catch (IOException e) { @@ -742,14 +842,20 @@ + /**Nimmt ein u.U. modifiziertes eScidocObject (als jDom.Document) und sucht das dazugehšrige abgespeicherte Objekt. + * @param doc + * @return + */ private ECHOObject getOldObjectFromESciDoc(Document doc) { MapretMap = new HashMap(); XPath md5Nodes; XPath itemId; XPath lastModificationDate; + XPath objId; try { md5Nodes= EScidocTools.getESciDocXpath(".//escidocComponents:component/escidocComponents:properties[prop:content-category[text()='index_meta']]/prop:checksum"); itemId= EScidocTools.getESciDocXpath(".//escidocItem:item/@xlink:href"); + objId= EScidocTools.getESciDocXpath(".//escidocItem:item/@objid"); lastModificationDate = EScidocTools.getESciDocXpath(".//escidocItem:item/@last-modification-date"); } catch (JDOMException e) { // TODO Auto-generated catch block @@ -762,6 +868,9 @@ try { node = (Element)md5Nodes.selectSingleNode(doc); idNode = (Attribute)itemId.selectSingleNode(doc); + if (idNode==null){ + idNode = (Attribute)objId.selectSingleNode(doc); + } lastModificationDateNode =(Attribute)lastModificationDate.selectSingleNode(doc); } catch (JDOMException e) { @@ -787,14 +896,26 @@ - public ArrayList getAllLinksOfContext(String string, String context) throws IOException, IllegalStateException, JDOMException { + /** + * Suche alle Links aus den Komponenten des Types type, d.h. es wird auf dem Context in allen Elementen nach: + * ".//escidocComponents:component[escidocComponents:properties/prop:content-category[text()='"+type+"']]/escidocComponents:content/@xlink:href" + * gesucht. + * + * @param type + * @param context + * @return + * @throws IOException + * @throws IllegalStateException + * @throws JDOMException + */ + public ArrayList getAllLinksOfContext(String type, String context) throws IOException, IllegalStateException, JDOMException { HttpResponse result = eScidocGet(context+"/resources/members"); Document doc = new SAXBuilder().build(result.getEntity().getContent()); XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item"); XPath id = EScidocTools.getESciDocXpath("./@xlink:href"); - XPath url= EScidocTools.getESciDocXpath(".//escidocComponents:component[escidocComponents:properties/prop:content-category[text()='"+string+"']]/escidocComponents:content/@xlink:href"); + XPath url= EScidocTools.getESciDocXpath(".//escidocComponents:component[escidocComponents:properties/prop:content-category[text()='"+type+"']]/escidocComponents:content/@xlink:href"); ArrayList ret = new ArrayList(); List items = xp.selectNodes(doc); @@ -819,6 +940,13 @@ + /** Suche alle Objekte mit field = value, die Felder ergeben sich aus /srw/search/escidoc_all?operation= explain + * + * @param field + * @param value + * @return + * @throws ESciDocXmlObjectException + */ public List getObjectsFromSearch(String field, String value) throws ESciDocXmlObjectException { String query = "/srw/search/escidoc_all?operation=searchRetrieve&version=1.1&query="; query+=field+"%3d"; @@ -848,6 +976,15 @@ + /** + * Suche alle Objekte mit field = value, die Felder ergeben sich aus /srw/search/escidoc_all?operation= explain + * + * @param field + * @param value + * @param context + * @return + * @throws ESciDocXmlObjectException + */ public List getObjectsFromSearch(String field, String value, String context) throws ESciDocXmlObjectException { String query = "/srw/search/escidoc_all?operation=searchRetrieve&version=1.1&query="; @@ -883,6 +1020,75 @@ + /** + * Teste ob das Item (itemString) mit Datestamp dateString versehen ist. + * @param itemString + * @param dateString + * @return + * @throws IOException + */ + public boolean isCurrent(String itemString, String dateString) throws IOException { + HttpResponse ret = eScidocGet(itemString); + if (ret.getStatusLine().getStatusCode()!=200){ + return false; + } + String dateStamp = getDateStamp(EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent())); + if(dateStamp.equals(dateString)) + return true; + return false; + } + + + + /** holt alle ids von Objekten, die mit item Ÿber das Predicate Ÿber Relationen verbunden sind (item hat das Format /ir/item/XXX) + * @param itemString + * @param predicate + * @return + */ + public List getIdsOfRelationFromObject(String item, + String predicate) { + String cmd=item+"/relations"; + ArrayList ret = new ArrayList(); + + HttpResponse result; + try { + result = eScidocGet(cmd); + } catch (IOException e) { + logger.error("Cannot deal with:"+cmd); + return ret; + } + + InputStream resultString; + try { + resultString = result.getEntity().getContent();; + } catch (IllegalStateException e) { + logger.error("Cannot deal with:"+cmd); + return ret; + } catch (IOException e) { + logger.error("Cannot deal with:"+cmd); + return ret; + } + + try { + Document doc = (new SAXBuilder()).build(resultString); + List xp = (List) getXPath(doc.getRootElement(), "//relations:relation[@predicate='"+predicate+"']/@xlink:href", false); + + for (Attribute attr: xp){ + ret.add(attr.getValue()); + } + return ret; + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return ret; + } + + + diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java --- a/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java Sat Jan 15 17:05:29 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java Mon May 14 09:58:45 2012 +0200 @@ -34,6 +34,13 @@ public static final String zs="http://www.loc.gov/zing/srw/"; public static final String srel="http://escidoc.de/core/01/structural-relations/"; public static final String structMap="http://www.escidoc.de/schemas/structmap/0.4"; + public static final String eterms="http://purl.org/escidoc/metadata/terms/0.1/"; + public static final String person="http://purl.org/escidoc/metadata/profiles/0.1/person"; + public static final String organization="http://purl.org/escidoc/metadata/profiles/0.1/organization"; + public static final String source="http://purl.org/escidoc/metadata/profiles/0.1/source"; + public static final String publication="http://purl.org/escidoc/metadata/profiles/0.1/publication"; + public static final String dcterms="http://purl.org/dc/terms/"; + public EScidocTools(EScidocBasicHandler connector) { // TODO Auto-generated constructor stub @@ -47,6 +54,7 @@ XPath xpathResources = XPath.newInstance(xpath); xpathResources.addNamespace("prop",prop); xpathResources.addNamespace("mpiwg",MPIWG); + xpathResources.addNamespace("dcterms",dcterms); xpathResources.addNamespace("rdf",RDF); xpathResources.addNamespace("echonavigation",ECHO); @@ -73,6 +81,11 @@ xpathResources.addNamespace("zs",zs); xpathResources.addNamespace("srel",srel); xpathResources.addNamespace("struct-map",structMap); + xpathResources.addNamespace("eterms",eterms); + xpathResources.addNamespace("person",person); + xpathResources.addNamespace("organization",organization); + xpathResources.addNamespace("publication",publication); + xpathResources.addNamespace("source",source); return xpathResources; } diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifier.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifier.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,169 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.net.URLEncoder; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.SAXParser; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.input.SAXBuilder; +import org.jdom.xpath.XPath; + +import sun.util.logging.resources.logging; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +/** + * Diese Klasse sucht aus den Pfaden im Pubman Eintrag der URL zu den Quellen den Identifier heraus und findet + * den zugeh�rigen Eintrag in den von der Foxridge in eScidoc geharvesten Metadaten. + * + * Daraus wird dann ein Container erstellt. + * @author dwinter + * + */ +public class PubmanFoxridgeIdentifier { + + Logger logger = Logger.getRootLogger(); + public PubmanFoxridgeIdentifier(){ + + } + + public static void main(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{ + String contextMembers="http://escidoc.mpiwg-berlin.mpg.de:8080/ir/context/escidoc:55281/resources/members"; + + String escidocServer="escidoc-test.mpiwg-berlin.mpg.de"; + + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.ERROR); + + PubmanFoxridgeIdentifier pi = new PubmanFoxridgeIdentifier(); + + Set identifiers = pi.getIdentifiersFromPubmanPath(contextMembers); + EScidocBasicHandler hd = new EScidocBasicHandler(escidocServer, 8080, "dwinter", "weikiki7"); + Set ids = new HashSet(); + int counter=0; + for (String id: identifiers){ + + + String command=String.format("/ir/items?maximumRecords=1&operation=searchRetrieve&version=1.1&query=%%22%%2Fmd-records%%2Fmd-record%%2Fadmin%%2Fidentifier%%22%%3D%%22%s%%22", id); + //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:1001%22"; + //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:40001%22"; + command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:12001%22"; + System.out.println(command); + List objects = hd.getObjectsFromFilterResult(command, "/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item"); + System.out.println("found"); + for (eSciDocXmlObject obj:objects){ + System.out.println("adding:"); + ids.add(obj.getESciDocId().replace("/ir/item/", "")); + counter+=1; + System.out.println("adding:"+obj.getESciDocId()); + } + //if (counter>10) + // break; + + } + String addMemberXML=""; + for(String id: ids){ + addMemberXML+=""+id+""; + } + addMemberXML+=""; + + //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161163/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); + //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161164/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); + HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161165/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); + System.out.println(EScidocBasicHandler.convertStreamToString(res.getEntity().getContent())); + } + + private Set getIdentifiersFromPubmanPath(String contextMembers) { + HttpGet get = new HttpGet(contextMembers); + DefaultHttpClient httpclient = new DefaultHttpClient(); + Set retSet = new HashSet(); + HttpResponse response; + try { + response = httpclient.execute(get); + } catch (ClientProtocolException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + return null; + } catch (IOException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + return null; + } + if (response.getStatusLine().getStatusCode()>200){ + logger.error(contextMembers); + logger.error(response.getStatusLine().getReasonPhrase()); + return null; + } + + HttpEntity ent = response.getEntity(); + + SAXBuilder builder = new SAXBuilder(); + Document doc; + try { + doc = builder.build(ent.getContent()); + } catch (IllegalStateException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + XPath xpath=null; + try { + String xpathString="/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item/metadataRecords:md-records/metadataRecords:md-record/publication:publication/dc:identifier"; + xpath = EScidocTools.getESciDocXpath(xpathString); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + List nodes; + try { + nodes= xpath.selectNodes(doc); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + + for (Element el: nodes){ + String text = el.getTextTrim(); + Pattern x = Pattern.compile("/library/([^/]*)"); + logger.debug("found:"+text); + Matcher m = x.matcher(text); + boolean matched = m.find(); + if (matched){ + String id = m.group(1); + logger.debug("adding:"+id); + retSet.add(id); + } + + } + return retSet; + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifierRelationModell.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifierRelationModell.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,216 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.net.URLEncoder; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.SAXParser; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.input.SAXBuilder; +import org.jdom.xpath.XPath; + +import sun.util.logging.resources.logging; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +/** + * Diese Klasse sucht aus den Pfaden im Pubman Eintrag der URL zu den Quellen den Identifier heraus und findet + * den zugehörigen Eintrag in den von der Foxridge in eScidoc geharvesten Metadaten. + * + * Daraus wird dann ein Container erstellt. + * @author dwinter + * + */ +public class PubmanFoxridgeIdentifierRelationModell { + + Logger logger = Logger.getRootLogger(); + public PubmanFoxridgeIdentifierRelationModell(){ + + } + + public static void main(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{ + String dateString="2011-06-08T11:48:50.287Z"; + + String itemString="/ir/item/escidoc:162177"; + + String escidocServer="escidoc-test.mpiwg-berlin.mpg.de"; + EScidocBasicHandler hd = new EScidocBasicHandler(escidocServer, 8080, "dwinter", "fl0rian"); + + if (args.length>0){ + dateString=args[0]; + } + + if (!hd.isCurrent(itemString,dateString)){ + System.err.println("not the last version!"); + System.exit(0); + } + + + List existingRelations = hd.getIdsOfRelationFromObject(itemString,"http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember"); + + String contextMembers="http://escidoc.mpiwg-berlin.mpg.de:8080/ir/context/escidoc:55281/resources/members"; + + + + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.ERROR); + + PubmanFoxridgeIdentifierRelationModell pi = new PubmanFoxridgeIdentifierRelationModell(); + + Set identifiers = pi.getIdentifiersFromPubmanPath(contextMembers); + + Set ids = new HashSet(); + int counter=0; + for (String id: identifiers){ + + + String command=String.format("/ir/items?maximumRecords=1&operation=searchRetrieve&version=1.1&query=%%22%%2Fmd-records%%2Fmd-record%%2Fadmin%%2Fidentifier%%22%%3D%%22%s%%22", id); + //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:1001%22"; + //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:40001%22"; + //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:12001%22"; + System.out.println(command); + List objects = hd.getObjectsFromFilterResult(command, "/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item"); + System.out.println("found"); + for (eSciDocXmlObject obj:objects){ + System.out.println("adding:"); + + String addObjId = obj.getESciDocId(); + if(existingRelations.contains(addObjId)){ + System.out.println("already in relations:"+addObjId); + } else { + + ids.add(addObjId.replace("/ir/item/", "")); + counter+=1; + System.out.println("adding:"+obj.getESciDocId()); + } + } + //if (counter>10) + // break; + + } + String addMemberXML=""; + for(String id: ids){ + addMemberXML+=""+ + ""+id+""+ + "http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember" + + " "; + + } + addMemberXML+=""; + + System.out.println("addmemberXML:"+addMemberXML); + File addFile = new File("/tmp/add.txt"); + FileWriter fw = new FileWriter(addFile); + fw.write(addMemberXML); + fw.close(); + + //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161163/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); + //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161164/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); + HttpResponse res = hd.eScidocPost(itemString+"/content-relations/add", EScidocBasicHandler.convertStringToStream(addMemberXML)); + System.out.println(EScidocBasicHandler.convertStreamToString(res.getEntity().getContent())); + } + + private Set getIdentifiersFromPubmanPath(String contextMembers) { + HttpGet get = new HttpGet(contextMembers); + DefaultHttpClient httpclient = new DefaultHttpClient(); + Set retSet = new HashSet(); + HttpResponse response; + try { + response = httpclient.execute(get); + } catch (ClientProtocolException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + return null; + } catch (IOException e1) { + // TODO Auto-generated catch block + e1.printStackTrace(); + return null; + } + if (response.getStatusLine().getStatusCode()>200){ + logger.error(contextMembers); + logger.error(response.getStatusLine().getReasonPhrase()); + return null; + } + + HttpEntity ent = response.getEntity(); + + SAXBuilder builder = new SAXBuilder(); + Document doc; + try { + doc = builder.build(ent.getContent()); + } catch (IllegalStateException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + XPath xpath=null; + try { + String xpathString="/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item/metadataRecords:md-records/metadataRecords:md-record/publication:publication/dc:identifier"; + xpath = EScidocTools.getESciDocXpath(xpathString); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + List nodes; + try { + nodes= xpath.selectNodes(doc); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } + File outAdd = new File("/tmp/outadded.txt"); + try { + FileWriter fw = new FileWriter(outAdd); + for (Element el: nodes){ + String text = el.getTextTrim(); + Pattern x = Pattern.compile("/library/([^/]*)"); + logger.debug("found:"+text); + Matcher m = x.matcher(text); + boolean matched = m.find(); + if (matched){ + String id = m.group(1); + logger.debug("adding:"+id); + retSet.add(id); + fw.write(id+"\n"); + } + + } + fw.close(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + System.exit(0); + } + return retSet; + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfo.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfo.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,111 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class RepairPublishingInfo { + + /** + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, + IOException, JDOMException, ESciDocXmlObjectException { + + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.DEBUG); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl0rian"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:38279/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + XPath publishingInfoContentXPath = EScidocTools.getESciDocXpath("//eterms:publishing-info/*"); + XPath publishingInfoXPath = EScidocTools.getESciDocXpath("//eterms:publishing-info"); + @SuppressWarnings("unchecked") + HashMap piContentHash = new HashMap(); + + + List piContents =publishingInfoContentXPath.selectNodes(doc); + for (Element piContent : piContents) { + + String piContentTag=piContent.getQualifiedName(); + + + + piContentHash.put(piContentTag,piContent); + } + + for (Element publishingInfo: (List)publishingInfoXPath.selectNodes(doc)){ + for(String piContentTag: piContentHash.keySet()){ + Element piContent = piContentHash.get(piContentTag); + + XPath xp= EScidocTools.getESciDocXpath("./"+piContent.getQualifiedName()); + Element el=(Element)xp.selectSingleNode(publishingInfo); + if(el==null){ + System.out.println("Adding:"+piContent.getQualifiedName()); + publishingInfo.addContent((Element)piContent.clone()); + changed=true; + } + + + + } + + } + //System.out.println(obj.printXML()); + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "repairing publishing info"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "repairing publishing info"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoDate.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoDate.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,100 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class RepairPublishingInfoDate { + + /** + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, + IOException, JDOMException, ESciDocXmlObjectException { + + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.DEBUG); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl1rian"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:38279/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + XPath publishingInfoContentXPath = EScidocTools.getESciDocXpath("//publication:publication/dcterms:created"); + XPath publishingInfoContentXPathIssued = EScidocTools.getESciDocXpath("//publication:publication/dcterms:issued"); + XPath publishingInfoXPath = EScidocTools.getESciDocXpath("//publication:publication"); + @SuppressWarnings("unchecked") + HashMap piContentHash = new HashMap(); + + Element issued =(Element)publishingInfoContentXPathIssued.selectSingleNode(doc); + if (issued==null){ + + List piContents =publishingInfoContentXPath.selectNodes(doc); + for (Element piContent : piContents) { + + String creationDate=piContent.getText(); + + Element node = (Element)publishingInfoXPath.selectSingleNode(doc); + + //Element element = new Element("issued",EScidocTools.dcterms); + //element.setText(creationDate); + //node.addContent(element); + piContent.setName("issued"); + String piContentName=piContent.getName(); + changed=true; + }} + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "repairing publishing info (pulisher in bookitem)"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "repairing publishing info (publisher in bookitem"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoDeleteWrong.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoDeleteWrong.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,115 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class RepairPublishingInfoDeleteWrong { + + /** + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, + IOException, JDOMException, ESciDocXmlObjectException { + + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.DEBUG); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl0rian"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:38279/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + XPath publishingInfoContentXPath = EScidocTools.getESciDocXpath("//publication:publication[@type='http://purl.org/eprint/type/Book']/eterms:publishing-info/*"); + XPath publishingInfoXPath = EScidocTools.getESciDocXpath("//eterms:publishing-info"); + @SuppressWarnings("unchecked") + HashMap piContentHash = new HashMap(); + + + List piContents =publishingInfoContentXPath.selectNodes(doc); + for (Element piContent : piContents) { + + String piContentName=piContent.getName(); + if (piContentName.equals("publisher")){ + piContent.detach(); + System.out.println("delete:"+piContent.getQualifiedName()); + changed=true; + } + + + //piContentHash.put(piContentTag,piContent); + } + +// for (Element publishingInfo: (List)publishingInfoXPath.selectNodes(doc)){ +// for(String piContentTag: piContentHash.keySet()){ +// Element piContent = piContentHash.get(piContentTag); +// +// XPath xp= EScidocTools.getESciDocXpath("./"+piContent.getQualifiedName()); +// Element el=(Element)xp.selectSingleNode(publishingInfo); +// if(el==null){ +// System.out.println("Adding:"+piContent.getQualifiedName()); +// publishingInfo.addContent((Element)piContent.clone()); +// changed=true; +// } +// +// +// +// } +// +// } +// System.out.println(obj.printXML()); + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "repairing publishing info (pulisher in bookitem)"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "repairing publishing info (publisher in bookitem"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoOrder.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoOrder.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,118 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class RepairPublishingInfoOrder { + + /** + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, + IOException, JDOMException, ESciDocXmlObjectException { + + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.DEBUG); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl0rian"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:38279/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + //XPath publishingInfoContentXPath = EScidocTools.getESciDocXpath("//source:source/eterms:publishing-info/*"); + //XPath publishingInfoXPath = EScidocTools.getESciDocXpath("//source:source/eterms:publishing-info"); + XPath publishingInfoContentXPath = EScidocTools.getESciDocXpath("//publication:publication/eterms:publishing-info/*"); + XPath publishingInfoXPath = EScidocTools.getESciDocXpath("//publication:publication/eterms:publishing-info"); + + + @SuppressWarnings("unchecked") + HashMap piContentHash = new HashMap(); + + + List piContents =publishingInfoContentXPath.selectNodes(doc); + for (Element piContent : piContents) { + + String piContentTag=piContent.getName(); + + + piContent.detach(); + piContentHash.put(piContentTag,piContent); + } + + Element infoXpath = (Element)publishingInfoXPath.selectSingleNode(doc); + Element addPlace= piContentHash.get("place"); + Element addEdition= piContentHash.get("edition"); + Element addPublisher= piContentHash.get("publisher"); + if(addPublisher!=null){ + System.out.println("publisher"); + infoXpath.addContent(addPublisher); + changed=true; + } + if(addPlace!=null){ + System.out.println("place"); + infoXpath.addContent(addPlace); + } + if(addEdition!=null){ + System.out.println("edition"); + infoXpath.addContent(addEdition); + changed=true; + + } + + //System.out.println(obj.printXML()); + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "repairing publishing info order"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "repairing publishing info order"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoPlace.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoPlace.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,129 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class RepairPublishingInfoPlace { + + /** + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, + IOException, JDOMException, ESciDocXmlObjectException { + + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.DEBUG); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl0rian"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:38279/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + XPath publishingInfoContentXPath = EScidocTools.getESciDocXpath("//publication:publication[@type='http://purl.org/eprint/type/Book']/eterms:publishing-info/*"); + XPath publishingInfoXPath = EScidocTools.getESciDocXpath("//source:source/eterms:publishing-info"); + @SuppressWarnings("unchecked") + HashMap piContentHash = new HashMap(); + + + List piContents =publishingInfoContentXPath.selectNodes(doc); + for (Element piContent : piContents) { + + String piContentName=piContent.getName(); + //if (piContentName.equals("place")){ + //piContent.detach(); + System.out.println("add:"+piContent.getQualifiedName()); + Element pi = (Element)publishingInfoXPath.selectSingleNode(doc); + + XPath xp= EScidocTools.getESciDocXpath("//source:source/eterms:publishing-info/"+piContent.getQualifiedName()); + Element el=(Element)xp.selectSingleNode(doc); + if(el==null){ + System.out.println("Adding:"+piContent.getQualifiedName()); + if(pi==null){ + System.out.println(" Cannot add"); + } else { + pi.addContent((Element)piContent.clone()); + changed=true; + } + } + + + //} + + + //piContentHash.put(piContentTag,piContent); + } + +// for (Element publishingInfo: (List)publishingInfoXPath.selectNodes(doc)){ +// for(String piContentTag: piContentHash.keySet()){ +// Element piContent = piContentHash.get(piContentTag); +// +// XPath xp= EScidocTools.getESciDocXpath("./"+piContent.getQualifiedName()); +// Element el=(Element)xp.selectSingleNode(publishingInfo); +// if(el==null){ +// System.out.println("Adding:"+piContent.getQualifiedName()); +// publishingInfo.addContent((Element)piContent.clone()); +// changed=true; +// } +// +// +// +// } +// +// } +// System.out.println(obj.printXML()); + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "repairing publishing info (pulisher in bookitem)"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "repairing publishing info (publisher in bookitem"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoPlaceMain.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoPlaceMain.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,129 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class RepairPublishingInfoPlaceMain { + + /** + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, + IOException, JDOMException, ESciDocXmlObjectException { + + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.DEBUG); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl0rian"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:38279/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + XPath publishingInfoContentXPath = EScidocTools.getESciDocXpath("//publication:publication[@type='http://purl.org/eprint/type/Book']/source:source/eterms:publishing-info/*"); + XPath publishingInfoXPath = EScidocTools.getESciDocXpath("//publication:publication/eterms:publishing-info"); + @SuppressWarnings("unchecked") + HashMap piContentHash = new HashMap(); + + + List piContents =publishingInfoContentXPath.selectNodes(doc); + for (Element piContent : piContents) { + + String piContentName=piContent.getName(); + //if (piContentName.equals("place")){ + //piContent.detach(); + System.out.println("add:"+piContent.getQualifiedName()); + Element pi = (Element)publishingInfoXPath.selectSingleNode(doc); + + XPath xp= EScidocTools.getESciDocXpath("//publication:publication/eterms:publishing-info/"+piContent.getQualifiedName()); + Element el=(Element)xp.selectSingleNode(doc); + if(el==null){ + System.out.println("Adding:"+piContent.getQualifiedName()); + if(pi==null){ + System.out.println(" Cannot add"); + } else { + pi.addContent((Element)piContent.clone()); + changed=true; + } + } + + + //} + + + //piContentHash.put(piContentTag,piContent); + } + +// for (Element publishingInfo: (List)publishingInfoXPath.selectNodes(doc)){ +// for(String piContentTag: piContentHash.keySet()){ +// Element piContent = piContentHash.get(piContentTag); +// +// XPath xp= EScidocTools.getESciDocXpath("./"+piContent.getQualifiedName()); +// Element el=(Element)xp.selectSingleNode(publishingInfo); +// if(el==null){ +// System.out.println("Adding:"+piContent.getQualifiedName()); +// publishingInfo.addContent((Element)piContent.clone()); +// changed=true; +// } +// +// +// +// } +// +// } +// System.out.println(obj.printXML()); + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "repairing publishing info (pulisher in bookitem)"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "repairing publishing info (publisher in bookitem"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceAffiliation.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceAffiliation.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,101 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class ReplaceAffiliation { + + /** + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, + IOException, JDOMException, ESciDocXmlObjectException { + + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.DEBUG); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl1rian"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:38279/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + XPath personXPath = EScidocTools.getESciDocXpath("//eterms:creator/person:person"); + XPath organizationXPath = EScidocTools.getESciDocXpath("./organization:organization"); + XPath titleXPath = EScidocTools.getESciDocXpath("./dc:title"); + XPath identifierXPath = EScidocTools.getESciDocXpath("./dc:identifier"); + + @SuppressWarnings("unchecked") + List persons = personXPath.selectNodes(doc); + for (Element person : persons) { + Element organization = (Element) organizationXPath.selectSingleNode(person); + if (organization!=null){ + Element title = (Element) titleXPath.selectSingleNode(organization); + + String titleString = title.getTextTrim(); + if (titleString + .startsWith("Max Planck Society")) { + title.setText("Max Planck Institute for the History of Science"); + Element identifier = (Element) identifierXPath.selectSingleNode(organization); + identifier.setText("escidoc:14002"); + changed=true; + + } + } + } + + //System.out.println(obj.printXML()); + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "changed affiliation of persons"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "changed affiliation of persons"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceConeIds.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceConeIds.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,90 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class ReplaceConeIds { + + /** + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, + IOException, JDOMException, ESciDocXmlObjectException { + + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.DEBUG); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl0rian"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:38279/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + XPath idXPath = EScidocTools.getESciDocXpath("//dc:identifier"); + @SuppressWarnings("unchecked") + List identifiers = idXPath.selectNodes(doc); + for (Element identifier : identifiers) { + String idString = identifier.getTextTrim(); + if (idString + .startsWith("http://pubman.mpiwg-berlin.mpg.de:8080")) { + idString=idString.replace("http://pubman.mpiwg-berlin.mpg.de:8080", + "http://pubman.mpiwg-berlin.mpg.de"); + identifier.setText(idString); + changed=true; + + } + } + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "changed cone identifiers"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "changed cone identifiers"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceLanguage.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceLanguage.java Mon May 14 09:58:45 2012 +0200 @@ -0,0 +1,112 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class ReplaceLanguage { + + /** + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, + IOException, JDOMException, ESciDocXmlObjectException { + + HashMap replaceLang = new HashMap(); + replaceLang.put("English", "eng"); + replaceLang.put("German", "deu"); + replaceLang.put("Spanish", "spa"); + replaceLang.put("Chinese", "zho"); + replaceLang.put("Arabic", "ara"); + replaceLang.put("Catalan", "cat"); + replaceLang.put("Danish", "dan"); + replaceLang.put("Dutch", "nld"); + replaceLang.put("French", "fra"); + replaceLang.put("Hebrew", "heb"); + replaceLang.put("Hungarian", "hun"); + replaceLang.put("Icelandic", "isl"); + replaceLang.put("Italian", "ita"); + replaceLang.put("Japanese", "jpn"); + replaceLang.put("Lithunian", "lit"); + replaceLang.put("Norwegian", "nor"); + replaceLang.put("Polish", "pol"); + replaceLang.put("Portugese", "por"); + replaceLang.put("Russian", "rus"); + replaceLang.put("Swedish", "swe"); + replaceLang.put("Vietnamese", "vie"); + + + + + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.DEBUG); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl0rian"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:38279/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + XPath idXPath = EScidocTools.getESciDocXpath("//dc:language"); + @SuppressWarnings("unchecked") + List identifiers = idXPath.selectNodes(doc); + for (Element identifier : identifiers) { + String idString = identifier.getTextTrim(); + identifier.setText(replaceLang.get(idString)); + changed=true; + } + + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "changed language code"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "changed language code"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } +} diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/Tools/SendFileToeSciDoc.java --- a/src/de/mpiwg/itgroup/eSciDoc/Tools/SendFileToeSciDoc.java Sat Jan 15 17:05:29 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/SendFileToeSciDoc.java Mon May 14 09:58:45 2012 +0200 @@ -7,13 +7,15 @@ import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; +import org.apache.log4j.BasicConfigurator; public class SendFileToeSciDoc { //private static final String ESCIDOC_SERVER = "xserve07.mpiwg-berlin.mpg.de"; - private static final String ESCIDOC_SERVER = "escidoc-test.mpiwg-berlin.mpg.de"; + private static final String ESCIDOC_SERVER = "escidoc.mpiwg-berlin.mpg.de"; + //private static final String ESCIDOC_SERVER = "escidoc-test.mpiwg-berlin.mpg.de"; /** * @param args @@ -27,6 +29,7 @@ return; } + BasicConfigurator.configure(); EScidocBasicHandler handler = new EScidocBasicHandler(ESCIDOC_SERVER,8080,args[2],args[3]); URL url; HttpResponse ret; @@ -38,7 +41,8 @@ return; } try { - ret = handler.eScidocPut(args[0], url); + //ret = handler.eScidocDelete(args[0]); + ret = handler.eScidocPost(args[0], url); } catch (ClientProtocolException e) { // TODO Auto-generated catch block e.printStackTrace(); diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java --- a/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java Sat Jan 15 17:05:29 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java Mon May 14 09:58:45 2012 +0200 @@ -1,5 +1,7 @@ package de.mpiwg.itgroup.eSciDoc.echoObjects; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; @@ -25,9 +27,23 @@ import org.apache.http.protocol.BasicHttpContext; import org.apache.http.protocol.ExecutionContext; import org.apache.http.protocol.HttpContext; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.input.SAXBuilder; +import org.jdom.output.XMLOutputter; +import org.jdom.xpath.XPath; + + +import com.sun.org.apache.xalan.internal.xsltc.dom.DOMWSFilter; +import com.sun.org.apache.xerces.internal.parsers.SAXParser; import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; import de.mpiwg.itgroup.metadataManager.pid.DCTransformer; public class ECHORessource extends ECHOObject{ @@ -244,4 +260,85 @@ } + // schreibe die escidoc:id in die index.meta + + public void writeEsciDocIDToIndexMeta(eSciDocXmlObject eSciDocXmlObject) { + String txt; + try { + txt = eSciDocXmlObject.getESciDocObjId(); + writeEsciDocIDToIndexMeta(txt); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + public void writeEsciDocIDToIndexMeta(String objid) { + String driType="escidoc-test"; + String txt; + Element result; + try { + txt = objid; + + URL url = new URL(metaData); + InputStream is = url.openStream(); + Document doc = new SAXBuilder().build(is); + XPath xp = XPath.newInstance("//meta/dri[@type=\""+driType+"\"]"); + result = (Element)xp.selectSingleNode(doc); + + if (result!=null){ + result.setText(txt); + } else { + XPath xpMeta = XPath.newInstance("//meta"); + Element metaTag = (Element)xpMeta.selectSingleNode(doc); + + if (metaTag==null){ + return; + } else { + + Element dri = new Element("dri"); + dri.setAttribute("type",driType); + dri.setText(txt); + metaTag.addContent(dri); + } + } + + XMLOutputter xo = new XMLOutputter(); + + + + //String outpath ="/tmp/out/"+archivePath; + String outpath =archivePath; + //File parent = new File(outpath); + //parent.mkdirs(); + + File oldFile = new File(outpath+"/index.meta"); + boolean f = oldFile.renameTo(new File(outpath+"/index.meta.old")); + if (!f){ + Logger lg = Logger.getLogger("transformerLogger"); + lg.error("cannot: write "+outpath+"/index.meta.old"); + lg.error("cannot: but will proceed to write new index.meta"); + + } + FileOutputStream out = new FileOutputStream(outpath+"/index.meta"); + xo.output(doc, out); + + Logger lg = Logger.getLogger("transformerLogger"); + lg.info("changed:"+outpath+" -- added escidoc: "+txt); + out.close(); + + } catch (MalformedURLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + + } + + } diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaIterator.java --- a/src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaIterator.java Sat Jan 15 17:05:29 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaIterator.java Mon May 14 09:58:45 2012 +0200 @@ -21,6 +21,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.util.ArrayList; import java.util.Enumeration; import java.util.Iterator; import java.util.Stack; @@ -28,6 +29,7 @@ +import org.apache.log4j.Logger; import org.jdom.Document; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; @@ -45,15 +47,27 @@ private File rootFolder; private File currentFolder; - private Stack stack; - + private Stack stack; + private ArrayListfilter; //Array of paths which shouldn'T be indexed - public IndexMetaIterator(File rootFolder){ + public IndexMetaIterator(File rootFolder) throws IOException{ + + filter = new ArrayList(); + filter.add("/mpiwg/online/permanent/SudanRockArt"); // TODO: make this configurable + this.rootFolder=rootFolder; this.currentFolder=rootFolder; - this.stack = new Stack(); - for (File f:rootFolder.listFiles()){ - stack.push(f); + this.stack = new Stack(); + + for (String f:rootFolder.list()){ + String fn = rootFolder.getCanonicalPath()+"/"+f; + if (!filter.contains(fn)){ + if (!f.equals("")){ // FIXME some filesystems (sshfs?) gives empty filenames if the path contains special characters. + stack.push(fn);} + else { + Logger.getLogger("notAddedFilesLogger").info("Folder -" +fn+" contains files with charakters I cannot read!" ); + } + } } } @Override @@ -65,24 +79,48 @@ @Override public ECHOObject next() { // TODO Auto-generated method stub - File nextFile = stack.pop(); - while(!nextFile.getName().endsWith(".meta") && !stack.isEmpty()){ - System.out.println("CHECK_________"+nextFile.getName()); - if(!nextFile.getName().equals("pageimg")){ //skip pageimg - if(nextFile.isDirectory()){ - for (File f:nextFile.listFiles()){ - stack.push(f); + String nextFile = stack.pop(); + while(!nextFile.endsWith(".meta") && !stack.isEmpty()){ + System.out.println("CHECK_________"+nextFile); + + + if(!nextFile.equals("pageimg")){ //skip pageimg + + File nf = new File(nextFile); + + if(nf.isDirectory()){ + for (String f:nf.list()){ + String fn; + try { + if (!f.startsWith(".")){ + fn = nf.getCanonicalPath()+"/"+f; + if (!filter.contains(fn)){ + if (!f.equals("")) {// FIXME some filesystems (sshfs?) gives empty filenames if the path contains special characters. + stack.push(fn);} + else { + Logger.getLogger("notAddedFilesLogger").info("Folder -" +fn+" contains files with characters I cannot read!" ); + } + + } + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } } } + nextFile = stack.pop(); + } - if (!nextFile.getName().endsWith(".meta")) //der letzte Eintrag muss noch gretrennt getestet werden. + if (!nextFile.endsWith(".meta")) //der letzte Eintrag muss noch gretrennt getestet werden. nextFile = null; System.out.println("FOUND:"+nextFile); try { if (nextFile!=null) - return createECHOObject(nextFile); + return createECHOObject(new File(nextFile)); } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -103,7 +141,7 @@ FoxridgeRessource er = new FoxridgeRessource(nextFile.getParentFile().getName(),nextFile.getParentFile().getAbsolutePath(),null); er.metaData = er.correctML(nextFile.getAbsolutePath()); - er.pid=er.getPid(); + //er.pid=er.getPid(); //TODO: not needed any more? er.echoUrl=er.metaData; //TODO find a better solution, what to present here, z.b. texttool-tag auswerten. return er; } catch (Exception e) { diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaWalker.java --- a/src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaWalker.java Sat Jan 15 17:05:29 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaWalker.java Mon May 14 09:58:45 2012 +0200 @@ -1,6 +1,7 @@ package de.mpiwg.itgroup.eSciDoc.foxridge; import java.io.File; +import java.io.IOException; import java.util.Iterator; import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; @@ -14,7 +15,13 @@ } public Iterator iterator() { - return new IndexMetaIterator(rootFolder); + try { + return new IndexMetaIterator(rootFolder); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return null; + } } } diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java --- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Sat Jan 15 17:05:29 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Mon May 14 09:58:45 2012 +0200 @@ -40,7 +40,7 @@ protected Importer importer; protected EScidocBasicHandler connector; protected Transformer transformer; - + private String context; private Logger addedFile = Logger.getLogger("addedFilesLogger"); private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); @@ -79,106 +79,19 @@ try { ECHOObject old; try { - old = connector.alreadyExists( - "/md-records/md-record/admin/archivePath", - ((ECHORessource) obj).archivePath, context); - } catch (ObjectNotUniqueError e) { - // TODO Auto-generated catch block - e.printStackTrace(); - continue; - } - if (old!=null) { - logger.debug("already exist:" - + ((ECHORessource) obj).archivePath); - handleExistingObject(obj,old); - continue; - } - } catch (ConnectorException e) { - logger.debug("already exist error:"); - e.printStackTrace(); - continue; - } - } - - obj.context = context; - - String contid = connector.getIDfromPID(obj.pid, context); - if (contid != null) { - System.out.println("------- belongsTo:" + contid); - } else { - - eSciDocXmlObject escidocItem = transformer.transform(obj); - - - try { - logger.info(escidocItem.printXML()); - // TODO write PID to back to echo-obj - Boolean result = connector.createItem(escidocItem); - if (result) { - addedObjects.add(escidocItem.getESciDocId()); - addedFile.debug(escidocItem.getESciDocId() + "\n"); - - } else { - notAddedObjects.add(obj.echoUrl); - notAddedFile.debug(obj.echoUrl); - + old = connector.alreadyExists( + "admin.archivePath", + ((ECHORessource) obj).archivePath, context,"="); + if (old==null){ //FIXME Problem bei der Erfassungder Metadaten sollte eigentlich nicht vorkommen! + old = connector.alreadyExists( + "admin.archivePath", + ((ECHORessource) obj).archivePath.replace("/mpiwg/online/permanent", "/Volumes/online/permanent"), context,"="); } - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - throw new ESciDocXmlObjectException(); - } catch (JDOMException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - throw new ESciDocXmlObjectException(); - } - - } - } - if (logger.getLevel() == Level.DEBUG) { - for (String addedObject : addedObjects) { - logger.debug(addedObject); - } - } - - return true; - } - - - /** Read objects into eScidoc or updates the objects if indexMeta has changed. - * @param type restrict the imported objects to a specific type, possible types should be defined in - * the given importer @see {@link #importer} - * @return - * @throws ConnectorException - * @throws TransformerException - * @throws ESciDocXmlObjectException - */ - public Boolean readObjectsFromInstance(String type) throws ConnectorException, TransformerException, ESciDocXmlObjectException { - ArrayList addedObjects = new ArrayList(); - ArrayList notAddedObjects = new ArrayList(); - for (ECHOObject obj : importer.getObjectList(type)) { - - if (ECHORessource.class.isInstance(obj)) { - try { - - // checke zuerst, ob die MD5 schon im publiziert Teil der Metadaten ist, dann tue nichts - String md5 = ((ECHORessource) obj).getIndexMetaMD5onServer(); - //List results = connector.getObjectsFromSearch("escidoc.component.checksum",md5); - List results = connector.getObjectsFromSearch("escidoc.component.checksum",md5,context); - if (results.size()>0){ //index.meta schon abgespeichert - notAddedFile.debug("alredy exist:"+obj.echoUrl); - continue; - } - - - - ECHOObject old; - try { - - old = connector.alreadyExists( - "/md-records/md-record/admin/archivePath", - ((ECHORessource) obj).archivePath, context); + if (old==null){ + old = connector.alreadyExists( + "admin.archivePath", + ((ECHORessource) obj).archivePath.replace("/mpiwg/online/experimental", "/Volumes/online/experimental"), context,"="); + } } catch (ObjectNotUniqueError e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -205,32 +118,32 @@ } else { eSciDocXmlObject escidocItem = transformer.transform(obj); - - - try { - logger.info(escidocItem.printXML()); - // TODO write PID to back to echo-obj - Boolean result = connector.createItem(escidocItem); - if (result) { - addedObjects.add(escidocItem.getESciDocId()); - addedFile.debug(escidocItem.getESciDocId() + "\n"); + + + try { + logger.info(escidocItem.printXML()); + // TODO write PID to back to echo-obj + Boolean result = connector.createItem(escidocItem); + if (result) { + addedObjects.add(escidocItem.getESciDocId()); + addedFile.debug(escidocItem.getESciDocId() + "\n"); - } else { - notAddedObjects.add(obj.echoUrl); - notAddedFile.debug(obj.echoUrl); - - } - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - throw new ESciDocXmlObjectException(); - } catch (JDOMException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - throw new ESciDocXmlObjectException(); + } else { + notAddedObjects.add(obj.echoUrl); + notAddedFile.debug(obj.echoUrl); + } - + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } + } } if (logger.getLevel() == Level.DEBUG) { @@ -242,8 +155,123 @@ return true; } - - + + /** Read objects into eScidoc or updates the objects if indexMeta has changed. + * @param type restrict the imported objects to a specific type, possible types should be defined in + * the given importer @see {@link #importer} + * @return + * @throws ConnectorException + * @throws TransformerException + * @throws ESciDocXmlObjectException + */ + public Boolean readObjectsFromInstance(String type) throws ConnectorException, TransformerException, ESciDocXmlObjectException { + ArrayList addedObjects = new ArrayList(); + ArrayList notAddedObjects = new ArrayList(); + for (ECHOObject obj : importer.getObjectList(type)) { + if (obj==null) + continue; + if (ECHORessource.class.isInstance(obj)) { + try { + + // checke zuerst, ob die MD5 schon im publiziert Teil der Metadaten ist, dann tue nichts + String md5 = ((ECHORessource) obj).getIndexMetaMD5onServer(); + //List results = connector.getObjectsFromSearch("escidoc.component.checksum",md5); + List results = connector.getObjectsFromSearch("escidoc.component.checksum",md5,context); + if (results.size()>0){ //index.meta schon abgespeichert + notAddedFile.debug("alredy exist:"+obj.echoUrl); + + ((ECHORessource) obj).writeEsciDocIDToIndexMeta(results.get(0)); + + continue; + } + + + + + ECHOObject old; + try { + + old = connector.alreadyExists( + "admin.archivePath", + ((ECHORessource) obj).archivePath, context,"="); + if (old==null){ //FIXME Problem bei der Erfassungder Metadaten sollte eigentlich nicht vorkommen! + old = connector.alreadyExists( + "admin.archivePath", + ((ECHORessource) obj).archivePath.replace("/mpiwg/online/permanent", "/Volumes/online_permanent"), context,"="); + } + if (old==null){ + old = connector.alreadyExists( + "admin.archivePath", + ((ECHORessource) obj).archivePath.replace("/mpiwg/online/experimental", "/Volumes/online_experimental"), context,"="); + } + } catch (ObjectNotUniqueError e) { + // TODO Auto-generated catch block + e.printStackTrace(); + continue; + } + if (old!=null) { + logger.debug("already exist:" + + ((ECHORessource) obj).archivePath); + handleExistingObject(obj,old); + continue; + } + } catch (ConnectorException e) { + logger.debug("already exist error:"); + e.printStackTrace(); + continue; + } + } + + obj.context = context; + + String contid=null; + + if (obj.pid!=null) + contid = connector.getIDfromPID(obj.pid, context); + if (contid != null) { + System.out.println("------- belongsTo:" + contid); + } else { + + eSciDocXmlObject escidocItem = transformer.transform(obj); + + + try { + logger.info(escidocItem.printXML()); + // TODO write PID to back to echo-obj + Boolean result = connector.createItem(escidocItem); + if (result) { + addedObjects.add(escidocItem.getESciDocId()); + addedFile.debug(escidocItem.getESciDocId() + "\n"); + + } else { + notAddedObjects.add(obj.echoUrl); + notAddedFile.debug(obj.echoUrl); + + } + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new ESciDocXmlObjectException(); + } + + } + } + if (logger.getLevel() == Level.DEBUG) { + for (String addedObject : addedObjects) { + logger.debug(addedObject); + } + } + + return true; + } + + + /** * Deal with existing objects, do nothing if md5 of stored metadata and metadata on the server is the same otherwise call {@link #updateObject(ECHOObject)}. @@ -257,21 +285,31 @@ ECHORessource objOldRes = (ECHORessource)old; String md5onServer = objNewRes.getIndexMetaMD5onServer(); String md5=objOldRes.getIndexMetaMD5stored(); + + objNewRes.writeEsciDocIDToIndexMeta(objOldRes.eScidocId); + if (md5onServer.equals(md5)) return; else { + updateObject(objNew, old); + + } - + } private void updateObject(ECHOObject objNew, ECHOObject objOld) throws TransformerException, ESciDocXmlObjectException { objNew.context = context; + + //erzeuge erst einmal ein neues XML Object aus den neuen Daten. eSciDocXmlObject escidocItem = transformer.transform(objNew); String lastModificationDateOld = objOld.lastModificationDate; + + //jetzt das alte ModeificationDate dort rein (wegen, optimitistic locking) escidocItem.setLastModificationDate(lastModificationDateOld); try { - HttpResponse ret = connector.eScidocPut(objOld.eScidocId, EScidocBasicHandler.convertStringToStream(escidocItem.printXML())); + HttpResponse ret = connector.eScidocPut("/ir/item/"+objOld.eScidocId, EScidocBasicHandler.convertStringToStream(escidocItem.printXML())); HttpEntity ent = ret.getEntity(); if (ret.getStatusLine().getStatusCode() != 200) { logger.debug("Can not update:" + objOld.eScidocId); @@ -289,32 +327,33 @@ // TODO Auto-generated catch block e.printStackTrace(); } - + addedFile.debug("updated:"+objOld.eScidocId); } /** * @param command * @param objectXPath + * @param comment der in escidoc bei der Operation abgespeichert wird. * @param mode 0 : only submit, 1:only release, 2:release and submit * @throws Exception */ - public void releaseAndSubmitObjects(String command, String objectXPath,int mode) + public void releaseAndSubmitObjects(String command, String objectXPath,String comment,int mode) throws Exception { Integer numberOfHits = connector.getNumberOfHitsFromFilterResult( command, objectXPath,mode); - - + + int tausend = ((numberOfHits-1) / MAX_REC); - + String queryRestrict=""; if(mode==0 | mode==2){ queryRestrict="query=%22/properties/version/status%22=pending"; } else { queryRestrict="query=%22/properties/version/status%22=submitted"; } - + for (int t = 0; t <= tausend; t++) { int start = t * MAX_REC+1; // int max=Math.min((t+1)*1000, numberOfHits); @@ -322,56 +361,56 @@ + String.valueOf(start)+"&"+queryRestrict; for (eSciDocXmlObject obj : connector .getObjectsFromFilterResult(command+query, objectXPath)) { - + //TODO is the following really necessary, currently the obj in the list is sometimes not the current one. try{ - HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); - HttpEntity ent = resObj.getEntity(); - if (ent!=null){ - obj= new eSciDocXmlObject(ent.getContent()); - } else { - logger.debug("Can not retrieve:" + obj.getESciDocId()); - continue; - } + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + logger.debug("Can not retrieve:" + obj.getESciDocId()); + continue; + } } catch (Exception e){ logger.debug("Can not retrieve:" + obj.getESciDocId()); continue; } if (mode==0 | mode==2){ HttpResponse res = connector.submitAnObject(obj, - "first release"); + comment); logger.debug(res.getStatusLine()); - + if (res.getStatusLine().getStatusCode() != 200) { logger.debug("Can not submit:" + obj.getESciDocId()); // res.getEntity().consumeContent(); // necessary to release // the conneciton - + } InputStream restream = res.getEntity().getContent(); logger.debug(EScidocBasicHandler.convertStreamToString(restream)); //res.getEntity().consumeContent(); // necessary to release the - // conneciton - + // conneciton + if (!connector.upDateObject(obj)) { logger.debug("Can not update:" + obj.getESciDocId()); // continue; - + } } - + if (mode==1 | mode==2){ - HttpResponse res = connector.releaseAnObject(obj, "first release"); + HttpResponse res = connector.releaseAnObject(obj, comment); logger.debug(res.getStatusLine()); if (res.getStatusLine().getStatusCode() != 200) { logger.debug("Can not release:" + obj.getESciDocId()); res.getEntity().consumeContent(); // necessary to release - // the conneciton + // the conneciton continue; } addedFile.debug("RELEASED:" + obj.getESciDocId()); res.getEntity().consumeContent(); // necessary to release the - // connecito + // connecito } } } @@ -381,24 +420,31 @@ public static void main(String[] args) throws Exception { Logger rl = Logger.getRootLogger(); - DOMConfigurator.configure("log4uconf.xml"); + DOMConfigurator.configure("/etc/escidocImportConfig.xml"); rl.setLevel(Level.DEBUG); - + EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7"); - - + + + //ECHOImporter newimporter = new ECHOImporter(new URL( + // "file:///Users/dwinter/libcoll.rdf")); + ECHOImporter newimporter = new ECHOImporter(new URL( - "file:///Users/dwinter/libcoll.rdf")); + "http://xserve09.mpiwg-berlin.mpg.de:19280/echo_nav/echo_pages/content/showRDF")); ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter, new ECHOTransformer(), connector, "/ir/context/escidoc:1001"); // hv.readObjectsFromInstance("ECHO_collection"); - // hv.readObjectsFromInstance("ECHO_resource"); + hv.readObjectsFromInstance("ECHO_resource"); hv.releaseAndSubmitObjects( "/ir/context/escidoc:1001/resources/members", - "//escidocItem:item",0); + "//escidocItem:item","first release",0); + hv.releaseAndSubmitObjects( + "/ir/context/escidoc:1001/resources/members", + "//escidocItem:item","first release",1); + // newimporter.organizeRessourcesInCollections(connector, // "/ir/context/escidoc:1001"); diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java --- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java Sat Jan 15 17:05:29 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java Mon May 14 09:58:45 2012 +0200 @@ -31,21 +31,38 @@ rl.setLevel(Level.DEBUG); + if (args.length<4){ + System.out.println("Usage: harvest username password path context."); + System.out.println("eg: harvest username password /mpiwg/online/permanent/einstein/annalen /ir/context/escidoc:12001"); + System.exit(0); + + } + + String username = args[0]; + String pw = args[1]; + String path = args[2]; + String context = args[3]; + + //IndexMetaIterator sd = new IndexMetaIterator(new File("/Volumes/online_permanent/echo")); //while (sd.hasNext()){ // System.out.println(sd.next()); //} - EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7"); + EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,username,pw); - ESciDocDataHarvester hv = new ESciDocDataHarvester(new FoxridgeImporter(new File("/Volumes/online_permanent/")), - new ECHOTransformer(),connector,"/ir/context/escidoc:12001"); + //ESciDocDataHarvester hv = new ESciDocDataHarvester(new FoxridgeImporter(new File("/Volumes/online_permanent/")), + // new ECHOTransformer(),connector,"/ir/context/escidoc:12001"); + + ESciDocDataHarvester hv = new ESciDocDataHarvester(new FoxridgeImporter(new File(path)), + new ECHOTransformer(),connector,context); + //hv.readObjectsFromInstance("ECHO_collection"); hv.readObjectsFromInstance("ECHO_resource"); //hv.releaseAndSubmitObjects("/ir/context/escidoc:12001"); - hv.releaseAndSubmitObjects("/ir/context/escidoc:12001/resources/members","//escidocItem:item",0); - hv.releaseAndSubmitObjects("/ir/context/escidoc:12001/resources/members","//escidocItem:item",1); + hv.releaseAndSubmitObjects(context+"/resources/members","//escidocItem:item","added esidoc test id",0); + hv.releaseAndSubmitObjects(context+"/resources/members","//escidocItem:item","added esidoc test id",1); } diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java --- a/src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java Sat Jan 15 17:05:29 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java Mon May 14 09:58:45 2012 +0200 @@ -38,8 +38,8 @@ public eSciDocXmlObject transform(ECHOObject obj) throws TransformerException { try{ - if (obj.pid==null) - return null; + //if (obj.pid==null) + // return null; diff -r df8c62d84f8f -r a844f6948dd8 src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java --- a/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java Sat Jan 15 17:05:29 2011 +0100 +++ b/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java Mon May 14 09:58:45 2012 +0200 @@ -53,12 +53,14 @@ try { dom = builder.build(template); - xpath = EScidocTools.getESciDocXpath("//prop:pid"); + if (pid!=null){ + xpath = EScidocTools.getESciDocXpath("//prop:pid"); - Element test = (Element) xpath.selectSingleNode(dom); + Element test = (Element) xpath.selectSingleNode(dom); - test.setText(pid); - this.pid=pid; + test.setText(pid); + this.pid=pid; + } } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -483,7 +485,22 @@ XPath xp = EScidocTools.getESciDocXpath("./@xlink:href"); Attribute href = (Attribute) xp.selectSingleNode(el); - return href.getValue(); + + + return href.getValue(); + + } + + public String getESciDocObjId() throws JDOMException { + + Element el = dom.getRootElement(); + + XPath xp = EScidocTools.getESciDocXpath("./@objid"); + Attribute href = (Attribute) xp.selectSingleNode(el); + + + return href.getValue(); + } public void setContentModel(String href) throws JDOMException { @@ -512,22 +529,21 @@ try { XPath xp = EScidocTools.getESciDocXpath(".//escidocItem:item/@last-modification-date"); item = (Attribute)xp.selectSingleNode(dom); + if (item==null){ + xp = EScidocTools.getESciDocXpath(".//escidocItem:item"); + Element element = (Element)xp.selectSingleNode(dom); + element.setAttribute("last-modification-date", lastModificationDateOld); + } + else { + item.setValue(lastModificationDateOld); + } - - if (item==null) {//existiert noch nicht} - //Namespace namespace = Namespace.getNamespace("item",EScidocTools.item); - xp = EScidocTools.getESciDocXpath(".//escidocItem:item"); - Element itemElement = (Element)xp.selectSingleNode(dom); - itemElement.setAttribute("last-modification-date", lastModificationDateOld); - - - } else { - item.setValue(lastModificationDateOld); - } } catch (JDOMException e) { e.printStackTrace(); throw new ESciDocXmlObjectException(); } + + }