Mercurial > hg > eSciDocImport
changeset 11:ecd66e70cb65
Tools for adding and changing DRIs in index.meta
author | dwinter |
---|---|
date | Thu, 26 Jul 2012 11:24:03 +0200 |
parents | b6cf6462d709 |
children | 6e55be78bd57 |
files | src/de/mpiwg/itgroup/eSciDoc/Tools/DRI/CheckAndAddDRIs.java src/de/mpiwg/itgroup/eSciDoc/Tools/DRI/ConvertXMLToDRIList.java src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoDate.java |
diffstat | 3 files changed, 360 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/DRI/CheckAndAddDRIs.java Thu Jul 26 11:24:03 2012 +0200 @@ -0,0 +1,288 @@ + +package de.mpiwg.itgroup.eSciDoc.Tools.DRI; + +import java.awt.datatransfer.StringSelection; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.HashMap; + +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.input.SAXBuilder; +import org.jdom.output.XMLOutputter; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.utils.EScidocNameSpaceContext; + +/** + * This package loads a list of filenames and the dris belonging to them as tab delimited list. + * It checks if the DRI are already in the index.meta, if not there are added, if there is wrong ID + * this is eithrer corrected or an warning message is generated + * @author dwinter + * + */ +public class CheckAndAddDRIs { + + public class DRI{ + + DRI(Document doc, String driString, Element driElement, boolean hasNameSpace){ + this.doc = doc; + this.driString=driString; + this.driElement=driElement; + this.hasNameSpace=hasNameSpace; + + + } + Document doc; + String driString; + Element driElement; + boolean hasNameSpace; + } + + HashMap<String,String> fileToDri; + String driType; + + public CheckAndAddDRIs(String fileName, String driType) throws IOException { + + this.driType=driType; + + FileInputStream fstream = new FileInputStream(fileName); + + DataInputStream in = new DataInputStream(fstream); + BufferedReader br = new BufferedReader(new InputStreamReader(in)); + String strLine; + + + // lies alle files and dris + fileToDri = new HashMap<String, String>(); + while ((strLine = br.readLine()) != null) { + String[] splitted= strLine.split(("\t")); + fileToDri.put(splitted[0], splitted[1]); + } + //Close the input stream + in.close(); + + } + + + /** + * Checks if the DRIs are in the index.meta + */ + private void doDRI(String mode) { + for (String key: fileToDri.keySet()){ + int result; + DRI dri; + try { + dri = getDRI(key); + result = checkDRI(dri,fileToDri.get(key)); + } catch (JDOMException e) { + System.err.println("jdomExecption:"+key); + continue; + + } catch (IOException e) { + System.err.println("IOError:"+key); + e.printStackTrace(); + continue; + + } + if (result==0){ + System.out.println(String.format("%s\t%s\tOK", key,fileToDri.get(key))); + } else if (result==1){ + if (mode.equals("check")){ + System.out.println(String.format("%s\t%s\tno DRI", key,fileToDri.get(key))); + } else { + try { + System.out.println(String.format("%s\t%s\tadding DRI", key,fileToDri.get(key))); + add(key,dri,fileToDri.get(key)); + } catch (JDOMException e) { + System.err.println("jdomExecption:"+key); + continue; + } catch (IOException e) { + + System.err.println("IOExecption:"+key); + } + } + } else if (result==2){ + + if (mode.equals("modify")){ + try { + modify(key,dri,fileToDri.get(key)); + } catch (IOException e) { + System.err.println("IOExecption:"+key); + } + } else { + System.out.println(String.format("%s\t%s\twrong DRI", key,fileToDri.get(key))); + } + } + + } + + + + + } + private void modify(String fileName, DRI dri, String driString) throws IOException { + dri.driElement.setText(driString); + writeIndexMeta(fileName, dri); + + } + + + private void add(String fileName, DRI dri, String driString) throws JDOMException, IOException { + XPath xp; + if (dri.hasNameSpace){ + xp = EScidocTools.getESciDocXpath("//mpiwg:meta"); + } else { + xp = EScidocTools.getESciDocXpath("//meta"); + } + Element meta = (Element)xp.selectSingleNode(dri.doc); + if (meta==null){ + System.err.println("NO metatag in:"+fileName); + return; + } + + Element el; + if (dri.hasNameSpace){ + el = new Element("dri",EScidocNameSpaceContext.MPIWG); + } + else { + el = new Element("dri"); + } + + el.setAttribute("type",this.driType); + el.addContent(driString); + meta.addContent(el); + + writeIndexMeta(fileName,dri); + } + + + private void writeIndexMeta(String fileName, DRI dri) throws IOException { + + File im = new File(fileName+"/index.meta"); + File old = new File(fileName+"/index.meta_old_DRI"); + + if (im.renameTo(old)){ + XMLOutputter out = new XMLOutputter(); + + String string = out.outputString(dri.doc); + Writer out1 = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(fileName+"/index.meta"), "UTF-8")); + + + out1.write(string); + out1.close(); + } else { + System.err.println("cannot rename:"+fileName); + } + + + + + // TODO Auto-generated method stub + + } + + + private int checkDRI(DRI dri, String driString) throws JDOMException, IOException { + + + if(dri.driString==null){ + return 1; + } else if (dri.driString.equals(driString)){ + return 0; + } + + return 2; + } + + + private DRI getDRI(String filename) throws JDOMException, IOException { + + + Document doc = readIndexMeta(filename); + + XPath xpath = EScidocTools.getESciDocXpath("//meta/dri[@type=\""+driType+"\"]"); + + + Element res = (Element)xpath.selectSingleNode(doc); + + boolean hasNameSpace=false; + //Try namespace + if (res == null){ + xpath = EScidocTools.getESciDocXpath("//mpiwg:meta/mpiwg:dri[@type=\""+driType+"\"]"); + res = (Element)xpath.selectSingleNode(doc); + + if (res==null) + return new DRI(doc,null,null,false); + else { + hasNameSpace=true; + } + } + + String txt=res.getTextTrim(); + + + + return new DRI(doc, txt, res, hasNameSpace); + } + + + private Document readIndexMeta(String filename) throws JDOMException, IOException { + SAXBuilder builder = new SAXBuilder(); + Document doc = builder.build(new File(filename+"/index.meta")); + return doc; + } + + + /** + * @param args + * @throws IOException + */ + public static void main(String[] args) throws IOException { + if (args.length <3){ + System.out.print("USAGE: command filename dri-type"); + System.exit(0); + } + + String command = args[0]; + String fileName = args[1]; + String driType = args[2]; + + final CheckAndAddDRIs ck = new CheckAndAddDRIs(fileName,driType); + + if (command.equals("check")){ + System.out.println("only checking"); + ck.doDRI("check"); + } else if (command.equals("add")){ + System.out.println("only adding"); + ck.doDRI("add"); + } else if (command.equals("modify")){ + System.out.println("adding and modify"); + ck.doDRI("modify"); + } + else { + System.out.println("Only the commands: check, add or modify are allowed"); + } + + + } + + + + + + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/DRI/ConvertXMLToDRIList.java Thu Jul 26 11:24:03 2012 +0200 @@ -0,0 +1,69 @@ +package de.mpiwg.itgroup.eSciDoc.Tools.DRI; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.List; + +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.Namespace; +import org.jdom.input.SAXBuilder; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.utils.EScidocNameSpaceContext; + +public class ConvertXMLToDRIList { + + private File inFile; + private File outFile; + + public ConvertXMLToDRIList(String inFile, String outFile){ + this.inFile=new File(inFile); + this.outFile=new File(outFile); + + } + + public void convert() throws JDOMException, IOException{ + SAXBuilder builder = new SAXBuilder(); + Document doc = builder.build(inFile); + + XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item"); + List<Element> res = xp.selectNodes(doc); + + FileWriter out = new FileWriter(outFile); + + for (Element el:res){ + XPath apPath = EScidocTools.getESciDocXpath(".//mpiwg:archivePath"); + Element ap = (Element) apPath.selectSingleNode(el); + String path = ap.getTextTrim(); + + //XPath propPath = EScidocTools.getESciDocXpath("./escidocItem:properties"); + //Element prop = (Element) propPath.selectSingleNode(el); + Namespace xlNs = Namespace.getNamespace(EScidocNameSpaceContext.xlink); + Attribute attr = el.getAttribute("href", xlNs); + String[] splitted = attr.getValue().split("/"); + String id = splitted[splitted.length-1]; + + out.write(String.format("%s\t%s\n", path,id)); + + + + } + + out.close(); + + } + + public static void main(String[] args) throws IOException, JDOMException { + if (args.length <2){ + System.out.print("USAGE: fileIN filenOUT"); + System.exit(0); + } + ConvertXMLToDRIList cv = new ConvertXMLToDRIList(args[0], args[1]); + cv.convert(); + } +}
--- a/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoDate.java Thu Jun 21 09:46:26 2012 +0200 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/RepairPublishingInfoDate.java Thu Jul 26 11:24:03 2012 +0200 @@ -31,7 +31,7 @@ IOException, JDOMException, ESciDocXmlObjectException { Logger logger = Logger.getRootLogger(); - logger.setLevel(Level.DEBUG); + logger.setLevel(Level.INFO); EScidocBasicHandler connector = new EScidocBasicHandler( "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl1rian"); @@ -45,7 +45,8 @@ String query = "?maximumRecords=" + String.valueOf(MAX_REC) + "&startRecord=" + String.valueOf(start); - String command = "/ir/context/escidoc:38279/resources/members"; + //String command = "/ir/context/escidoc:38279/resources/members"; + String command = "/ir/context/escidoc:79281/resources/members"; for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( command + query, objectXPath)) {