Mercurial > hg > eSciDocImport
diff src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java @ 0:c6929e63b0b8
first import
author | dwinter |
---|---|
date | Wed, 24 Nov 2010 16:52:07 +0100 |
parents | |
children | fab8e78184fa |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,457 @@ +package de.mpiwg.itgroup.eSciDoc.utils; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FilenameFilter; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringBufferInputStream; +import java.io.StringWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.net.URL; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + + + +import org.w3c.dom.Attr; + +import org.xml.sax.SAXException; + +//import com.sun.org.apache.xml.internal.serialize.OutputFormat; +//import com.sun.org.apache.xml.internal.serialize.XMLSerializer; +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.Namespace; +import org.jdom.input.DOMBuilder; +import org.jdom.input.SAXBuilder; +import org.jdom.output.XMLOutputter; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.Tools.MyFileNameFilter; + +public class eSciDocXmlObject { + + + private Document dom; + private XPath xpath; + private String pid; + private DOMBuilder parser; + + public eSciDocXmlObject(String pid, InputStream template) throws Exception { + + SAXBuilder builder = new SAXBuilder(); + + dom = builder.build(template); + + xpath = EScidocTools.getESciDocXpath("//prop:pid"); + + Element test = (Element) xpath.selectSingleNode(dom); + + test.setText(pid); + this.pid=pid; + + } + +public eSciDocXmlObject(URL url) throws Exception { + + SAXBuilder builder = new SAXBuilder(); + + dom = builder.build(url); + + xpath = EScidocTools.getESciDocXpath("//prop:pid"); + + Element test = (Element) xpath.selectSingleNode(dom); + + this.pid= test.getTextTrim(); + + } + + public eSciDocXmlObject(Element el) throws JDOMException { + + dom = new Document((Element)el.clone()); + + xpath = EScidocTools.getESciDocXpath("//prop:pid"); + + Element test = (Element) xpath.selectSingleNode(dom); + if (test!=null) + this.pid= test.getTextTrim(); +} + + public void insertDC(Element mdDc) throws Exception{ + //NodeList dcList = mdDc.getFirstChild().getChildNodes(); + XPath dcXPath = EScidocTools.getESciDocXpath("//dc-md"); //todo check path to dc in escddoc templates + + Element dcContainer = (Element) dcXPath.selectSingleNode(dom); + + if (dcContainer==null) + throw new Exception(); + + for(Object ct: mdDc.getChildren()) + { + if (Element.class.isInstance(ct)){ + Element e = (Element) ct; + String name = e.getName(); + Element content = new Element(name, EScidocTools.DC); + content.setText(e.getText()); + dcContainer.addContent(content); + } + } + } + + public void insertDC(HashMap<String,String> dc) throws Exception{ + Set<String> dcList = dc.keySet(); + + XPath dcXPath = EScidocTools.getESciDocXpath("//dc-md"); //todo check path to dc in escddoc templates + + Element dcContainer = (Element) dcXPath.selectSingleNode(dom); + + for (String name : dcList){ + Element newChild = new Element(name,EScidocTools.DC); + newChild.setText(dc.get(name)); + dcContainer.addContent(newChild); + } + + } + + public void insertMeta(Element meta) throws Exception{ + if (meta==null) + return; + + XPath xp = EScidocTools.getESciDocXpath("//metadata-records:md-record[@name='mpiwg-index_meta']"); + Element dcContainer = (Element) xp.selectSingleNode(dom); + + dcContainer.addContent((Element)meta.clone()); + + } + + + public void setTitle(String title) throws Exception{ + XPath xp = EScidocTools.getESciDocXpath("//foxml:xmlContent/oai_dc:dc"); + Element dcContainer = (Element) xp.selectSingleNode(dom); + + + + Element newChild = new Element("title",EScidocTools.DC); + newChild.setText(title); + + + dcContainer.addContent(newChild); + + + + + } + + public void setReferenceFolder(String folderpath) throws Exception{ + XPath xp = EScidocTools.getESciDocXpath("//mpiwg:bundle/mpiwg:bundle-root/@path"); + Element container = (Element) xp.selectSingleNode(dom); + + container.setText(folderpath); + + } + + public void setVLPId(String id) throws Exception{ + XPath xp = EScidocTools.getESciDocXpath("//vlp:admin/vlp:identifier"); + Element container = (Element) xp.selectSingleNode(dom); + + container.setText(id); + } + /** + * @param file Pfad mit den Grundordner des Dokument + * @throws Exception +*/ + public void generateXMLIndex(File file) throws Exception{ + + + File[] files = file.listFiles(); + + //first try to find the image path + File imagefolder = null; + for (File f: files){ + + if (f.getName().equals("pageimg")) + imagefolder = f; + else if (f.getName().equals("pages")) + imagefolder = f; + } + if (imagefolder==null) + return; + + FilenameFilter filter = new MyFileNameFilter(); + + ArrayList<ArrayList<String>> tupels = new ArrayList<ArrayList<String>>(); + ArrayList<String> tmp; + for (File image : imagefolder.listFiles(filter)) + { + tmp = new ArrayList<String>(); + + tmp.add(calculateName(image)); + tmp.add(image.getName()); + + tupels.add(tmp); + } + generateXMLIndex(tupels); + } + + private void generateXMLIndex(ArrayList<ArrayList<String>> tupels) throws Exception { + // DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + //factory.setNamespaceAware(true); + //DocumentBuilder db =factory.newDocumentBuilder(); + + XPath xp = EScidocTools.getESciDocXpath("//vlp:index"); + Element index = (Element) xp.selectSingleNode(dom); + + + for (ArrayList<String> tuple: tupels ){ + + Element newChild = new Element("page",EScidocTools.VLP); + newChild.setText(tuple.get(0)); + + newChild.setAttribute("nr", tuple.get(1)); + index.addContent(newChild); + + } + + + } + + private String calculateName(File f) + { + String name = f.getName(); + String[] splitted = name.split("[.]"); + String n = splitted[0]; + String name2 = n.substring(1); + char letter = n.charAt(0); + return letter+": "+name2; + }; + + + public void setRelationship(String collection) throws Exception{ + XPath xp = EScidocTools.getESciDocXpath("//foxml:datastream[@ID='RELS-EXT']/foxml:datastreamVersion/foxml:xmlContent"); + Element xmlContent = (Element) xp.selectSingleNode(dom); + + + XPath rdfxp = EScidocTools.getESciDocXpath("//rdf:RDF"); + Element rdf = (Element) rdfxp.selectSingleNode(dom); + + + if (rdf==null) + { + rdf = new Element("RDF",EScidocTools.RDF); + + xmlContent.addContent(rdf); + } + + rdf.setAttribute("rdf", EScidocTools.RDF); + rdf.setAttribute("fedora", EScidocTools.FEDORA); + + Element rdfDescription = new Element("Description",EScidocTools.RDF); + + Namespace ns = Namespace.getNamespace("rdf",EScidocTools.RDF); + rdfDescription.setAttribute("about","info:fedora/"+pid, ns); + + Element rdfMember = new Element("isMemberOfCollection",EScidocTools.FEDORA); + rdfDescription.setAttribute("resource", collection, ns); + + rdfDescription.addContent(rdfMember); + + rdf.addContent(rdfDescription); + + + + + } + public void addWebUrl(String url) throws Exception + { + XPath xp = EScidocTools.getESciDocXpath("//escidocComponents:content[@xlink:title='web_page']"); + Element xmlContent = (Element) xp.selectSingleNode(dom); + + + Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink); + + xmlContent.setAttribute("href", url,ns); + } + + public void addOrigUrlToMPIWGMetaData(String url) throws Exception + { + XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin/mpiwg:orig-ref"); + Element xmlContent = (Element) xp.selectSingleNode(dom); + + + Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink); + + xmlContent.setAttribute("href", url,ns); + + } + + public void addIndexMetaUrl(String url) throws Exception + { + if (url.equals("")) + return; + + InputStream componentXML=getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml"); + addComponent(url,componentXML,"index_meta","text/xml","internal-managed"); + + + } + + + + public String printXML() throws IOException{ + XMLOutputter out = new XMLOutputter(); + + String string = out.outputString(dom); + return string; + + } + + public void addFullText(String url) throws Exception { + addFullText(url,getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml")); + + } + + public void addFullText(String url, InputStream inputStream) throws Exception { + addComponent(url, inputStream,"text","text/xml","internal-managed"); + } + + + public void addComponent(String url, InputStream inputStream, String category, String mimeType, String storageType) throws JDOMException, IOException{ + + SAXBuilder builder = new SAXBuilder(); + + Document dom2 = builder.build(inputStream); + + + XPath xp = EScidocTools.getESciDocXpath("//escidocComponents:properties/prop:content-category"); + Element item = (Element)xp.selectSingleNode(dom2); + item.setText(category); + + xp = EScidocTools.getESciDocXpath("//escidocComponents:properties/prop:mime-type"); + item = (Element)xp.selectSingleNode(dom2); + item.setText(mimeType); + + xp = EScidocTools.getESciDocXpath("//escidocComponents:content"); + item = (Element)xp.selectSingleNode(dom2); + + Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink); + + item.setAttribute("href", url,ns); + item.setAttribute("storage",storageType); + + + xp = EScidocTools.getESciDocXpath("//escidocComponents:components"); + item = (Element)xp.selectSingleNode(dom); + + + //Node newNode = dom.importNode(dom2.getFirstChild(), true); + item.addContent((Element)dom2.getRootElement().clone()); + } + + public void addImageFolderPath(String imageFolder) throws Exception { + + XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin"); + Element item = (Element)xp.selectSingleNode(dom); + + Element images= new Element("imageFolder",EScidocTools.MPIWG); + images.setText(imageFolder); + + item.addContent(images); + + + } + + public void addArchiveFolderPath(String archivePath) throws JDOMException { + XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin"); + Element item = (Element)xp.selectSingleNode(dom); + + Element images= new Element("archivePath",EScidocTools.MPIWG); + images.setText(archivePath); + + item.addContent(images); + + } + + public Boolean upDateFromXML(String xml) { + SAXBuilder builder = new SAXBuilder(); + + + ByteArrayInputStream in; + try { + in = new ByteArrayInputStream(xml.getBytes("utf-8")); + + dom = builder.build(in); + + } catch (UnsupportedEncodingException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return false; + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return false; + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return false; + } + + return true; + + + + } + + public void addDisplayUrl(String link) throws JDOMException, IOException { + + if (link==null || link.equals("")) + return; + + InputStream componentXML=getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml"); + addComponent(link,componentXML,"display_url","text/html","external-url"); + + + + + + } + + public String getESciDocId() throws JDOMException { + + Element el = dom.getRootElement(); + + XPath xp = EScidocTools.getESciDocXpath("./@xlink:href"); + Attribute href = (Attribute) xp.selectSingleNode(el); + return href.getValue(); + } + + public void setContentModel(String href) throws JDOMException { + + XPath xp = EScidocTools.getESciDocXpath("//escidocItem:properties/srel:content-model/@xlink:href"); + Attribute item = (Attribute)xp.selectSingleNode(dom); + item.setValue(href); + + + } + + public void setContext(String context) throws JDOMException { + XPath xp = EScidocTools.getESciDocXpath("//escidocItem:properties/srel:context/@xlink:href"); + Attribute item = (Attribute)xp.selectSingleNode(dom); + item.setValue(context); + + + } + + public Document getDocument(){ + return dom; + } + + +}