diff src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java @ 0:c6929e63b0b8

first import
author dwinter
date Wed, 24 Nov 2010 16:52:07 +0100
parents
children fab8e78184fa
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java	Wed Nov 24 16:52:07 2010 +0100
@@ -0,0 +1,457 @@
+package de.mpiwg.itgroup.eSciDoc.utils;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringBufferInputStream;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+
+
+import org.w3c.dom.Attr;
+
+import org.xml.sax.SAXException;
+
+//import com.sun.org.apache.xml.internal.serialize.OutputFormat;
+//import com.sun.org.apache.xml.internal.serialize.XMLSerializer;
+import org.jdom.Attribute;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.Namespace;
+import org.jdom.input.DOMBuilder;
+import org.jdom.input.SAXBuilder;
+import org.jdom.output.XMLOutputter;
+import org.jdom.xpath.XPath;
+
+import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
+import de.mpiwg.itgroup.eSciDoc.Tools.MyFileNameFilter;
+
+public class eSciDocXmlObject {
+
+	
+	private Document dom;
+	private XPath xpath;
+	private String pid;
+	private DOMBuilder parser;
+	
+	public eSciDocXmlObject(String pid, InputStream template) throws Exception {
+		
+		SAXBuilder  builder = new SAXBuilder();
+		
+		dom = builder.build(template);
+		
+		xpath = EScidocTools.getESciDocXpath("//prop:pid");
+		
+		Element test = (Element) xpath.selectSingleNode(dom);
+			
+		test.setText(pid);
+		this.pid=pid;
+	
+	}
+	
+public eSciDocXmlObject(URL url) throws Exception {
+		
+		SAXBuilder  builder = new SAXBuilder();
+		
+		dom = builder.build(url);
+		
+		xpath = EScidocTools.getESciDocXpath("//prop:pid");
+		
+		Element test = (Element) xpath.selectSingleNode(dom);
+			
+		this.pid= test.getTextTrim();
+	
+	}
+	
+	public eSciDocXmlObject(Element el) throws JDOMException {
+		
+		dom = new Document((Element)el.clone());
+		
+		xpath = EScidocTools.getESciDocXpath("//prop:pid");
+		
+		Element test = (Element) xpath.selectSingleNode(dom);
+		if (test!=null)	
+			this.pid= test.getTextTrim();
+}
+
+	public void insertDC(Element mdDc) throws Exception{
+		//NodeList dcList = mdDc.getFirstChild().getChildNodes();
+		XPath dcXPath = EScidocTools.getESciDocXpath("//dc-md"); //todo check path to dc in escddoc templates
+		
+		Element dcContainer = (Element) dcXPath.selectSingleNode(dom);
+		
+		if (dcContainer==null)
+			throw new Exception();
+		
+		for(Object ct: mdDc.getChildren())
+		{
+			if (Element.class.isInstance(ct)){
+				Element e = (Element) ct;
+				String name = e.getName();
+				Element content = new Element(name, EScidocTools.DC);
+				content.setText(e.getText());
+				dcContainer.addContent(content);
+			}
+		}
+	}
+	
+	public void insertDC(HashMap<String,String> dc) throws Exception{
+		Set<String> dcList = dc.keySet();
+		
+	XPath dcXPath = EScidocTools.getESciDocXpath("//dc-md"); //todo check path to dc in escddoc templates
+		
+		Element dcContainer = (Element) dcXPath.selectSingleNode(dom);
+	
+		for (String name : dcList){
+				Element newChild = new Element(name,EScidocTools.DC);
+				newChild.setText(dc.get(name));
+				dcContainer.addContent(newChild);
+			}
+		
+	}
+
+		public void insertMeta(Element meta) throws Exception{
+		if (meta==null)
+			return;
+		
+			XPath xp = EScidocTools.getESciDocXpath("//metadata-records:md-record[@name='mpiwg-index_meta']");
+			Element dcContainer = (Element) xp.selectSingleNode(dom);
+			
+			dcContainer.addContent((Element)meta.clone());
+			
+	}
+	
+	
+	public void setTitle(String title) throws Exception{
+		XPath xp = EScidocTools.getESciDocXpath("//foxml:xmlContent/oai_dc:dc");
+		Element dcContainer = (Element) xp.selectSingleNode(dom);
+	
+	
+		
+		Element newChild = new Element("title",EScidocTools.DC);
+		newChild.setText(title);
+		
+		
+		dcContainer.addContent(newChild);
+		
+			
+		
+	
+	}
+	
+	public void setReferenceFolder(String folderpath) throws Exception{
+		XPath xp = EScidocTools.getESciDocXpath("//mpiwg:bundle/mpiwg:bundle-root/@path");
+		Element container = (Element) xp.selectSingleNode(dom);
+	
+		container.setText(folderpath);
+	
+	}
+	
+	public void setVLPId(String id) throws Exception{
+		XPath xp = EScidocTools.getESciDocXpath("//vlp:admin/vlp:identifier");
+		Element container = (Element) xp.selectSingleNode(dom);
+	
+		container.setText(id);
+	}
+	/**
+	 * @param file Pfad mit den Grundordner des Dokument	
+	 * @throws Exception 
+*/
+	public void generateXMLIndex(File file) throws Exception{
+		
+	
+		File[] files = file.listFiles();
+		
+		//first try to find the image path
+		File imagefolder = null;
+		for (File f: files){
+			
+			if (f.getName().equals("pageimg"))
+				imagefolder = f;
+			else if (f.getName().equals("pages"))
+				imagefolder = f;
+		}
+		if (imagefolder==null)
+			return;
+		
+		FilenameFilter filter = new MyFileNameFilter();
+		
+		ArrayList<ArrayList<String>> tupels = new ArrayList<ArrayList<String>>();
+		ArrayList<String> tmp;
+		for (File image : imagefolder.listFiles(filter))
+		{
+			tmp = new ArrayList<String>();
+			
+			tmp.add(calculateName(image));
+			tmp.add(image.getName());
+			
+			tupels.add(tmp);
+		}
+		generateXMLIndex(tupels);
+	}
+	
+	  private void generateXMLIndex(ArrayList<ArrayList<String>> tupels) throws Exception {
+		 // DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+		 //factory.setNamespaceAware(true);
+		 //DocumentBuilder db =factory.newDocumentBuilder();
+		  
+		XPath xp = EScidocTools.getESciDocXpath("//vlp:index");
+		Element index = (Element) xp.selectSingleNode(dom);
+		
+		 
+		 for (ArrayList<String> tuple: tupels ){ 
+
+				Element newChild = new Element("page",EScidocTools.VLP);
+				newChild.setText(tuple.get(0));
+				
+				newChild.setAttribute("nr", tuple.get(1));
+				index.addContent(newChild);
+			
+		  }
+			  
+		  
+	}
+
+	private String calculateName(File f)
+		{
+			String name = f.getName();
+			String[] splitted = name.split("[.]");
+			String n = splitted[0];
+			String name2 = n.substring(1);
+			char letter = n.charAt(0);
+			return letter+": "+name2;
+		};
+
+
+	public void setRelationship(String collection) throws Exception{
+		XPath xp = EScidocTools.getESciDocXpath("//foxml:datastream[@ID='RELS-EXT']/foxml:datastreamVersion/foxml:xmlContent");
+		Element xmlContent = (Element) xp.selectSingleNode(dom);
+	
+		
+		XPath rdfxp = EScidocTools.getESciDocXpath("//rdf:RDF");
+		Element rdf = (Element) rdfxp.selectSingleNode(dom);
+	
+	
+		if (rdf==null)
+			{	
+			rdf = new Element("RDF",EScidocTools.RDF);
+		
+			xmlContent.addContent(rdf);
+			}
+				
+		rdf.setAttribute("rdf", EScidocTools.RDF);
+		rdf.setAttribute("fedora", EScidocTools.FEDORA);
+		
+		Element rdfDescription = new Element("Description",EScidocTools.RDF);
+		
+		Namespace ns = Namespace.getNamespace("rdf",EScidocTools.RDF);	
+		rdfDescription.setAttribute("about","info:fedora/"+pid, ns);
+		
+		Element rdfMember = new Element("isMemberOfCollection",EScidocTools.FEDORA);
+		rdfDescription.setAttribute("resource", collection, ns);
+		
+		rdfDescription.addContent(rdfMember);
+		
+		rdf.addContent(rdfDescription);
+		
+		
+		
+		
+	}
+	public void addWebUrl(String url) throws Exception
+	{
+		XPath xp = EScidocTools.getESciDocXpath("//escidocComponents:content[@xlink:title='web_page']");
+		Element xmlContent = (Element) xp.selectSingleNode(dom);
+	
+	
+		Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink);	
+
+		xmlContent.setAttribute("href", url,ns);
+	}
+	
+	public void addOrigUrlToMPIWGMetaData(String url) throws Exception
+		{
+			XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin/mpiwg:orig-ref");
+			Element xmlContent = (Element) xp.selectSingleNode(dom);
+		
+		
+			Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink);	
+
+			xmlContent.setAttribute("href", url,ns);
+			
+		}
+
+	public void addIndexMetaUrl(String url) throws Exception
+	{
+		if (url.equals(""))
+			return;
+		
+		InputStream componentXML=getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml");
+		addComponent(url,componentXML,"index_meta","text/xml","internal-managed");
+			
+		
+	}
+
+		
+	
+	public String printXML() throws IOException{
+		 	XMLOutputter out = new XMLOutputter();
+		 	
+		 	String string = out.outputString(dom);
+		 	return string;
+	       
+	}
+
+	public void addFullText(String url) throws Exception {
+		addFullText(url,getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml"));
+		
+	}
+	
+	public void addFullText(String url, InputStream inputStream) throws Exception {
+		addComponent(url, inputStream,"text","text/xml","internal-managed");
+	}
+	
+
+	public void addComponent(String url, InputStream inputStream, String category, String mimeType, String storageType) throws JDOMException, IOException{
+		
+		SAXBuilder builder = new SAXBuilder();
+
+		Document dom2 = builder.build(inputStream);
+
+	    
+		XPath xp = EScidocTools.getESciDocXpath("//escidocComponents:properties/prop:content-category");
+		Element item = (Element)xp.selectSingleNode(dom2);
+		item.setText(category);
+		
+		xp = EScidocTools.getESciDocXpath("//escidocComponents:properties/prop:mime-type");
+		item = (Element)xp.selectSingleNode(dom2);
+		item.setText(mimeType);
+		
+		xp = EScidocTools.getESciDocXpath("//escidocComponents:content");
+		item = (Element)xp.selectSingleNode(dom2);
+		
+		Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink);	
+
+		item.setAttribute("href", url,ns);
+		item.setAttribute("storage",storageType);
+		
+		
+		xp = EScidocTools.getESciDocXpath("//escidocComponents:components");
+		item = (Element)xp.selectSingleNode(dom);
+	
+	
+		//Node newNode = dom.importNode(dom2.getFirstChild(), true);
+		item.addContent((Element)dom2.getRootElement().clone());
+	}
+
+	public void addImageFolderPath(String imageFolder) throws Exception {
+		
+		XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin");
+		Element item = (Element)xp.selectSingleNode(dom);
+		
+		Element images= new Element("imageFolder",EScidocTools.MPIWG);
+		images.setText(imageFolder);
+			
+		item.addContent(images);
+			
+			
+	}
+
+	public void addArchiveFolderPath(String archivePath) throws JDOMException {
+		XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin");
+		Element item = (Element)xp.selectSingleNode(dom);
+		
+		Element images= new Element("archivePath",EScidocTools.MPIWG);
+		images.setText(archivePath);
+			
+		item.addContent(images);
+		
+	}
+
+	public Boolean upDateFromXML(String xml)  {
+	SAXBuilder  builder = new SAXBuilder();
+		
+	
+		ByteArrayInputStream in;
+		try {
+			in = new ByteArrayInputStream(xml.getBytes("utf-8"));
+		
+			dom = builder.build(in);
+			
+		} catch (UnsupportedEncodingException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return false;
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return false;
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return false;
+		}
+	
+		return true;
+		
+	
+		
+	}
+
+	public void addDisplayUrl(String link) throws JDOMException, IOException {
+		
+			if (link==null || link.equals(""))
+				return;
+			
+			InputStream componentXML=getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml");
+			addComponent(link,componentXML,"display_url","text/html","external-url");
+				
+			
+		
+
+		
+	}
+
+	public String getESciDocId() throws JDOMException {
+		
+		Element el = dom.getRootElement();
+		
+		XPath xp = EScidocTools.getESciDocXpath("./@xlink:href");
+		Attribute href = (Attribute) xp.selectSingleNode(el);
+	    return href.getValue();
+	}
+
+	public void setContentModel(String href) throws JDOMException {
+		
+		XPath xp = EScidocTools.getESciDocXpath("//escidocItem:properties/srel:content-model/@xlink:href");
+		Attribute item = (Attribute)xp.selectSingleNode(dom);
+		item.setValue(href);
+	
+		
+	}
+
+	public void setContext(String context) throws JDOMException {
+		XPath xp = EScidocTools.getESciDocXpath("//escidocItem:properties/srel:context/@xlink:href");
+		Attribute item = (Attribute)xp.selectSingleNode(dom);
+		item.setValue(context);
+	
+		
+	}
+
+	public Document getDocument(){
+		return dom;
+	}
+
+
+}