Mercurial > hg > eSciDocImport
changeset 10:ad5c0748bd08
minor
author | dwinter |
---|---|
date | Mon, 20 Aug 2012 12:53:45 +0200 |
parents | b6cf6462d709 |
children | 6e55be78bd57 |
files | src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceAffiliation_local.java src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaIterator.java src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java |
diffstat | 4 files changed, 153 insertions(+), 44 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/ReplaceAffiliation_local.java Mon Aug 20 12:53:45 2012 +0200 @@ -0,0 +1,101 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; + +import java.io.IOException; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class ReplaceAffiliation_local { + + /** + * @param args + * @throws ESciDocXmlObjectException + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, + IOException, JDOMException, ESciDocXmlObjectException { + + Logger logger = Logger.getRootLogger(); + logger.setLevel(Level.DEBUG); + EScidocBasicHandler connector = new EScidocBasicHandler( + "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl0rian"); + + if(args.length<2){ + System.out.println("Usage: startrecord maximumrecords"); + System.exit(-1); + } + String MAX_REC = args[1]; + String start = args[0]; + String objectXPath = "//escidocItem:item"; + + String query = "?maximumRecords=" + String.valueOf(MAX_REC) + + "&startRecord=" + String.valueOf(start); + String command = "/ir/context/escidoc:38279/resources/members"; + for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( + command + query, objectXPath)) { + + Document doc = obj.getDocument(); + Boolean changed=false; + XPath personXPath = EScidocTools.getESciDocXpath("//eterms:creator/person:person"); + XPath organizationXPath = EScidocTools.getESciDocXpath("./organization:organization"); + XPath titleXPath = EScidocTools.getESciDocXpath("./dc:title"); + XPath identifierXPath = EScidocTools.getESciDocXpath("./dc:identifier"); + + @SuppressWarnings("unchecked") + List<Element> persons = personXPath.selectNodes(doc); + for (Element person : persons) { + Element organization = (Element) organizationXPath.selectSingleNode(person); + if (organization!=null){ + Element title = (Element) titleXPath.selectSingleNode(organization); + + String titleString = title.getTextTrim(); + if (titleString + .startsWith("Max Planck Society")) { + title.setText("Max Planck Institute for the History of Science"); + Element identifier = (Element) identifierXPath.selectSingleNode(organization); + identifier.setText("escidoc:14002"); + changed=true; + + } + } + } + + //System.out.println(obj.printXML()); + if (changed){ + + Boolean retVal = connector.updateItem(obj); + System.out.println("Replaced:"+obj.getESciDocId()); + HttpResponse retValu = connector.submitAnObject(obj, "changed affiliation of persons"); + + System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); + HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); + HttpEntity ent = resObj.getEntity(); + if (ent!=null){ + obj= new eSciDocXmlObject(ent.getContent()); + } else { + System.out.println("Can not retrieve:" + obj.getESciDocId()); + continue; + } + + HttpResponse reValue2 = connector.releaseAnObject(obj, "changed affiliation of persons"); + System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); + + } + } + + } +}
--- a/src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaIterator.java Thu Jun 21 09:46:26 2012 +0200 +++ b/src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaIterator.java Mon Aug 20 12:53:45 2012 +0200 @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.Enumeration; import java.util.Iterator; +import java.util.List; import java.util.Stack; import java.util.Vector; @@ -51,14 +52,14 @@ private ArrayList<String>filter; //Array of paths which shouldn'T be indexed public IndexMetaIterator(File rootFolder) throws IOException{ - + filter = new ArrayList<String>(); filter.add("/mpiwg/online/permanent/SudanRockArt"); // TODO: make this configurable - + this.rootFolder=rootFolder; this.currentFolder=rootFolder; this.stack = new Stack<String>(); - + for (String f:rootFolder.list()){ String fn = rootFolder.getCanonicalPath()+"/"+f; if (!filter.contains(fn)){ @@ -82,39 +83,42 @@ String nextFile = stack.pop(); while(!nextFile.endsWith(".meta") && !stack.isEmpty()){ System.out.println("CHECK_________"+nextFile); - - + + if(!nextFile.endsWith("pageimg") & !nextFile.endsWith("pages") & !nextFile.endsWith("pagesHi") & !nextFile.endsWith("pagesLo") & !nextFile.endsWith("pageimg")){ //skip pageimg - + File nf = new File(nextFile); - + if(nf.isDirectory()){ - for (String f:nf.list()){ - String fn; - try { - if (!f.startsWith(".")){ - fn = nf.getCanonicalPath()+"/"+f; - if (!filter.contains(fn)){ - if (!f.equals("")) {// FIXME some filesystems (sshfs?) gives empty filenames if the path contains special characters. - stack.push(fn);} - else { - Logger.getLogger("notAddedFilesLogger").info("Folder -" +fn+" contains files with characters I cannot read!" ); + String[] filesInDirectory=nf.list(); + if (filesInDirectory != null){ + for (String f:filesInDirectory){ + String fn; + try { + if (!f.startsWith(".")){ + fn = nf.getCanonicalPath()+"/"+f; + if (!filter.contains(fn)){ + if (!f.equals("")) {// FIXME some filesystems (sshfs?) gives empty filenames if the path contains special characters. + stack.push(fn);} + else { + Logger.getLogger("notAddedFilesLogger").info("Folder -" +fn+" contains files with characters I cannot read!" ); + } + } - - } + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); + } - } } } - + nextFile = stack.pop(); - + } if (!nextFile.endsWith(".meta")) //der letzte Eintrag muss noch gretrennt getestet werden. nextFile = null; @@ -136,15 +140,15 @@ } private ECHOObject createECHOObject(File nextFile) throws Exception { - + //Document doc = new SAXBuilder().build(nextFile); try{ - FoxridgeRessource er = new FoxridgeRessource(nextFile.getParentFile().getName(),nextFile.getParentFile().getAbsolutePath(),null); - - er.metaData = er.correctML(nextFile.getAbsolutePath()); - //er.pid=er.getPid(); //TODO: not needed any more? - er.echoUrl=er.metaData; //TODO find a better solution, what to present here, z.b. texttool-tag auswerten. - return er; + FoxridgeRessource er = new FoxridgeRessource(nextFile.getParentFile().getName(),nextFile.getParentFile().getAbsolutePath(),null); + + er.metaData = er.correctML(nextFile.getAbsolutePath()); + //er.pid=er.getPid(); //TODO: not needed any more? + er.echoUrl=er.metaData; //TODO find a better solution, what to present here, z.b. texttool-tag auswerten. + return er; } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -154,12 +158,12 @@ @Override public void remove() { // TODO Auto-generated method stub - + } - + } - +
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Thu Jun 21 09:46:26 2012 +0200 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Mon Aug 20 12:53:45 2012 +0200 @@ -240,9 +240,10 @@ // TODO write PID to back to echo-obj Boolean result = connector.createItem(escidocItem); if (result) { - addedObjects.add(escidocItem.getESciDocId()); - addedFile.debug(escidocItem.getESciDocId() + "\n"); - + String objID=escidocItem.getESciDocId(); + addedObjects.add(objID); + addedFile.debug(objID + "\n"); + ((ECHORessource)obj).writeEsciDocIDToIndexMeta(objID.replace("/ir/item/","")); } else { notAddedObjects.add(obj.echoUrl); notAddedFile.debug(obj.echoUrl); @@ -344,7 +345,7 @@ Integer numberOfHits = connector.getNumberOfHitsFromFilterResult( command, objectXPath,mode); - + logger.info(String.format("%s objects found.", numberOfHits)); int tausend = ((numberOfHits-1) / MAX_REC); String queryRestrict=""; @@ -353,7 +354,7 @@ } else { queryRestrict="query=%22/properties/version/status%22=submitted"; } - + int counter=0; for (int t = 0; t <= tausend; t++) { int start = t * MAX_REC+1; // int max=Math.min((t+1)*1000, numberOfHits); @@ -362,6 +363,9 @@ for (eSciDocXmlObject obj : connector .getObjectsFromFilterResult(command+query, objectXPath)) { + logger.info(String.format("%s/%s", counter,numberOfHits)); + counter+=1; + //TODO is the following really necessary, currently the obj in the list is sometimes not the current one. try{ HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); @@ -436,7 +440,7 @@ new ECHOTransformer(), connector, "/ir/context/escidoc:1001"); // hv.readObjectsFromInstance("ECHO_collection"); - hv.readObjectsFromInstance("ECHO_resource"); + //hv.readObjectsFromInstance("ECHO_resource"); hv.releaseAndSubmitObjects( "/ir/context/escidoc:1001/resources/members",
--- a/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java Thu Jun 21 09:46:26 2012 +0200 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarvester.java Mon Aug 20 12:53:45 2012 +0200 @@ -29,7 +29,7 @@ Logger.getLogger("addedFilesLogger").setLevel(Level.DEBUG); Logger.getLogger("notAddedFilesLogger").setLevel(Level.DEBUG); - rl.setLevel(Level.DEBUG); + rl.setLevel(Level.INFO); if (args.length<4){ System.out.println("Usage: harvest username password path context."); @@ -58,10 +58,10 @@ //hv.readObjectsFromInstance("ECHO_collection"); - hv.readObjectsFromInstance("ECHO_resource"); + //hv.readObjectsFromInstance("ECHO_resource"); //hv.releaseAndSubmitObjects("/ir/context/escidoc:12001"); - hv.releaseAndSubmitObjects(context+"/resources/members","//escidocItem:item","added esidoc test id",0); + //hv.releaseAndSubmitObjects(context+"/resources/members","//escidocItem:item","added esidoc test id",0); hv.releaseAndSubmitObjects(context+"/resources/members","//escidocItem:item","added esidoc test id",1);