Mercurial > hg > eSciDocImport
view src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java @ 2:fab8e78184fa
minor
author | dwinter |
---|---|
date | Mon, 10 Jan 2011 12:42:27 +0100 |
parents | c6929e63b0b8 |
children | 58b52df9763c |
line wrap: on
line source
package de.mpiwg.itgroup.eSciDoc.utils; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; import java.io.StringBufferInputStream; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.net.URL; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Set; import org.w3c.dom.Attr; import org.xml.sax.SAXException; //import com.sun.org.apache.xml.internal.serialize.OutputFormat; //import com.sun.org.apache.xml.internal.serialize.XMLSerializer; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.Namespace; import org.jdom.input.DOMBuilder; import org.jdom.input.SAXBuilder; import org.jdom.output.XMLOutputter; import org.jdom.xpath.XPath; import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; import de.mpiwg.itgroup.eSciDoc.Tools.MyFileNameFilter; public class eSciDocXmlObject { private Document dom; private XPath xpath; private String pid; private DOMBuilder parser; public eSciDocXmlObject(String pid, InputStream template) throws Exception { SAXBuilder builder = new SAXBuilder(); dom = builder.build(template); xpath = EScidocTools.getESciDocXpath("//prop:pid"); Element test = (Element) xpath.selectSingleNode(dom); test.setText(pid); this.pid=pid; } public eSciDocXmlObject(URL url) throws Exception { SAXBuilder builder = new SAXBuilder(); dom = builder.build(url); xpath = EScidocTools.getESciDocXpath("//prop:pid"); Element test = (Element) xpath.selectSingleNode(dom); this.pid= test.getTextTrim(); } public eSciDocXmlObject(Element el) throws JDOMException { dom = new Document((Element)el.clone()); xpath = EScidocTools.getESciDocXpath("//prop:pid"); Element test = (Element) xpath.selectSingleNode(dom); if (test!=null) this.pid= test.getTextTrim(); } public eSciDocXmlObject(InputStream content) throws JDOMException, IOException { SAXBuilder builder = new SAXBuilder(); dom = builder.build(content); xpath = EScidocTools.getESciDocXpath("//prop:pid"); Element test = (Element) xpath.selectSingleNode(dom); this.pid= test.getTextTrim(); } public void insertDC(Element mdDc) throws Exception{ //NodeList dcList = mdDc.getFirstChild().getChildNodes(); XPath dcXPath = EScidocTools.getESciDocXpath("//dc-md"); //todo check path to dc in escddoc templates Element dcContainer = (Element) dcXPath.selectSingleNode(dom); if (dcContainer==null) throw new Exception(); for(Object ct: mdDc.getChildren()) { if (Element.class.isInstance(ct)){ Element e = (Element) ct; String name = e.getName(); Element content = new Element(name, EScidocTools.DC); content.setText(e.getText()); dcContainer.addContent(content); } } } public void insertDC(HashMap<String,String> dc) throws Exception{ Set<String> dcList = dc.keySet(); XPath dcXPath = EScidocTools.getESciDocXpath("//dc-md"); //todo check path to dc in escddoc templates Element dcContainer = (Element) dcXPath.selectSingleNode(dom); for (String name : dcList){ Element newChild = new Element(name,EScidocTools.DC); newChild.setText(dc.get(name)); dcContainer.addContent(newChild); } } public void insertMeta(Element meta) throws Exception{ if (meta==null) return; XPath xp = EScidocTools.getESciDocXpath("//metadata-records:md-record[@name='mpiwg-index_meta']"); Element dcContainer = (Element) xp.selectSingleNode(dom); dcContainer.addContent((Element)meta.clone()); } public void setTitle(String title) throws Exception{ XPath xp = EScidocTools.getESciDocXpath("//foxml:xmlContent/oai_dc:dc"); Element dcContainer = (Element) xp.selectSingleNode(dom); Element newChild = new Element("title",EScidocTools.DC); newChild.setText(title); dcContainer.addContent(newChild); } public void setReferenceFolder(String folderpath) throws Exception{ XPath xp = EScidocTools.getESciDocXpath("//mpiwg:bundle/mpiwg:bundle-root/@path"); Element container = (Element) xp.selectSingleNode(dom); container.setText(folderpath); } public void setVLPId(String id) throws Exception{ XPath xp = EScidocTools.getESciDocXpath("//vlp:admin/vlp:identifier"); Element container = (Element) xp.selectSingleNode(dom); container.setText(id); } /** * @param file Pfad mit den Grundordner des Dokument * @throws Exception */ public void generateXMLIndex(File file) throws Exception{ File[] files = file.listFiles(); //first try to find the image path File imagefolder = null; for (File f: files){ if (f.getName().equals("pageimg")) imagefolder = f; else if (f.getName().equals("pages")) imagefolder = f; } if (imagefolder==null) return; FilenameFilter filter = new MyFileNameFilter(); ArrayList<ArrayList<String>> tupels = new ArrayList<ArrayList<String>>(); ArrayList<String> tmp; for (File image : imagefolder.listFiles(filter)) { tmp = new ArrayList<String>(); tmp.add(calculateName(image)); tmp.add(image.getName()); tupels.add(tmp); } generateXMLIndex(tupels); } private void generateXMLIndex(ArrayList<ArrayList<String>> tupels) throws Exception { // DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); //factory.setNamespaceAware(true); //DocumentBuilder db =factory.newDocumentBuilder(); XPath xp = EScidocTools.getESciDocXpath("//vlp:index"); Element index = (Element) xp.selectSingleNode(dom); for (ArrayList<String> tuple: tupels ){ Element newChild = new Element("page",EScidocTools.VLP); newChild.setText(tuple.get(0)); newChild.setAttribute("nr", tuple.get(1)); index.addContent(newChild); } } private String calculateName(File f) { String name = f.getName(); String[] splitted = name.split("[.]"); String n = splitted[0]; String name2 = n.substring(1); char letter = n.charAt(0); return letter+": "+name2; }; public void setRelationship(String collection) throws Exception{ XPath xp = EScidocTools.getESciDocXpath("//foxml:datastream[@ID='RELS-EXT']/foxml:datastreamVersion/foxml:xmlContent"); Element xmlContent = (Element) xp.selectSingleNode(dom); XPath rdfxp = EScidocTools.getESciDocXpath("//rdf:RDF"); Element rdf = (Element) rdfxp.selectSingleNode(dom); if (rdf==null) { rdf = new Element("RDF",EScidocTools.RDF); xmlContent.addContent(rdf); } rdf.setAttribute("rdf", EScidocTools.RDF); rdf.setAttribute("fedora", EScidocTools.FEDORA); Element rdfDescription = new Element("Description",EScidocTools.RDF); Namespace ns = Namespace.getNamespace("rdf",EScidocTools.RDF); rdfDescription.setAttribute("about","info:fedora/"+pid, ns); Element rdfMember = new Element("isMemberOfCollection",EScidocTools.FEDORA); rdfDescription.setAttribute("resource", collection, ns); rdfDescription.addContent(rdfMember); rdf.addContent(rdfDescription); } public void addWebUrl(String url) throws Exception { XPath xp = EScidocTools.getESciDocXpath("//escidocComponents:content[@xlink:title='web_page']"); Element xmlContent = (Element) xp.selectSingleNode(dom); Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink); xmlContent.setAttribute("href", url,ns); } public void addOrigUrlToMPIWGMetaData(String url) throws Exception { XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin/mpiwg:orig-ref"); Element xmlContent = (Element) xp.selectSingleNode(dom); Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink); xmlContent.setAttribute("href", url,ns); } public void addIndexMetaUrl(String url) throws Exception { if (url.equals("")) return; InputStream componentXML=getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml"); addComponent(url,componentXML,"index_meta","text/xml","internal-managed"); } public String printXML() throws IOException{ XMLOutputter out = new XMLOutputter(); String string = out.outputString(dom); return string; } public void addFullText(String url) throws Exception { addFullText(url,getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml")); } public void addFullText(String url, InputStream inputStream) throws Exception { addComponent(url, inputStream,"text","text/xml","internal-managed"); } public void addComponent(String url, InputStream inputStream, String category, String mimeType, String storageType) throws JDOMException, IOException{ SAXBuilder builder = new SAXBuilder(); Document dom2 = builder.build(inputStream); XPath xp = EScidocTools.getESciDocXpath("//escidocComponents:properties/prop:content-category"); Element item = (Element)xp.selectSingleNode(dom2); item.setText(category); xp = EScidocTools.getESciDocXpath("//escidocComponents:properties/prop:mime-type"); item = (Element)xp.selectSingleNode(dom2); item.setText(mimeType); xp = EScidocTools.getESciDocXpath("//escidocComponents:content"); item = (Element)xp.selectSingleNode(dom2); Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink); item.setAttribute("href", url,ns); item.setAttribute("storage",storageType); xp = EScidocTools.getESciDocXpath("//escidocComponents:components"); item = (Element)xp.selectSingleNode(dom); //Node newNode = dom.importNode(dom2.getFirstChild(), true); item.addContent((Element)dom2.getRootElement().clone()); } public void addImageFolderPath(String imageFolder) throws Exception { XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin"); Element item = (Element)xp.selectSingleNode(dom); Element images= new Element("imageFolder",EScidocTools.MPIWG); images.setText(imageFolder); item.addContent(images); } public void addArchiveFolderPath(String archivePath) throws JDOMException { XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin"); Element item = (Element)xp.selectSingleNode(dom); Element images= new Element("archivePath",EScidocTools.MPIWG); images.setText(archivePath); item.addContent(images); } public Boolean upDateFromXML(String xml) { SAXBuilder builder = new SAXBuilder(); ByteArrayInputStream in; try { in = new ByteArrayInputStream(xml.getBytes("utf-8")); dom = builder.build(in); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); return false; } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); return false; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); return false; } return true; } public void addDisplayUrl(String link) throws JDOMException, IOException { if (link==null || link.equals("")) return; InputStream componentXML=getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml"); addComponent(link,componentXML,"display_url","text/html","external-url"); } public String getESciDocId() throws JDOMException { Element el = dom.getRootElement(); XPath xp = EScidocTools.getESciDocXpath("./@xlink:href"); Attribute href = (Attribute) xp.selectSingleNode(el); return href.getValue(); } public void setContentModel(String href) throws JDOMException { XPath xp = EScidocTools.getESciDocXpath("//escidocItem:properties/srel:content-model/@xlink:href"); Attribute item = (Attribute)xp.selectSingleNode(dom); item.setValue(href); } public void setContext(String context) throws JDOMException { XPath xp = EScidocTools.getESciDocXpath("//escidocItem:properties/srel:context/@xlink:href"); Attribute item = (Attribute)xp.selectSingleNode(dom); item.setValue(context); } public Document getDocument(){ return dom; } }