Mercurial > hg > eSciDocImport
view src/de/mpiwg/itgroup/eSciDoc/Tools/ingestLib.java @ 0:c6929e63b0b8
first import
author | dwinter |
---|---|
date | Wed, 24 Nov 2010 16:52:07 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpiwg.itgroup.eSciDoc.Tools; import java.io.IOException; import java.io.StringReader; import java.net.MalformedURLException; import java.net.URL; import java.rmi.RemoteException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.rpc.ServiceException; import org.apache.axis.types.NonNegativeInteger; import org.apache.xmlrpc.XmlRpcException; import org.apache.xmlrpc.client.XmlRpcClient; import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; import org.w3c.dom.Document; import org.xml.sax.InputSource; public class ingestLib extends IngestECHO{ /** * @param args * @throws Exception * @throws Exception */ private static String ZOPEPROVIDER = "http://127.0.0.1:18080"; private static String createFoxml(String litid) throws Exception{ String pid = eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:"+pid,"vlpRessourceTemplate.xml"); //obj.setTitle("lit11111 - title"); //get DC Metadata from the Ressource XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); XmlRpcClient client = new XmlRpcClient(); Object[] params = new Object[]{}; config.setServerURL(new URL(ZOPEPROVIDER+"/metadataMain")); client.setConfig(config); params = new Object[]{"http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta"}; String result = (String) client.execute("getDCFormatted", params); System.out.println("dC:"+result); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); DocumentBuilder db =factory.newDocumentBuilder(); InputSource resultStream = new InputSource(new StringReader(result)); Document dc = db.parse(resultStream); obj.insertDC(dc); obj.setReferenceFolder("/mpiwg/online/permanent/vlp/"+litid); //obj.generateXMLIndex(new File("/mpiwg/online/permanent/vlp/"+litid)); //long date = new Date().getTime(); SimpleDateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss.S'Z'"); String dateStr = dateformat.format(new Date()); obj.addWebUrl("http://vlp.mpiwg-berlin.mpg.de/references?id="+litid, litid, dateStr); obj.addIndexMetaUrl("http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta", litid, dateStr); obj.setRelationship("info:fedora/vlp:col1"); obj.setVLPId(litid); return obj.printXML(); } private static void ingestAllVLPObjects() throws MalformedURLException, XmlRpcException { XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); XmlRpcClient client = new XmlRpcClient(); config.setServerURL(new URL("http://127.0.0.1:18080/vlp/vlp_coll/library/data")); client.setConfig(config); Object[] params = new Object[]{}; Object[] res = (Object[]) client.execute("getAllRessources", params); String string = ""; for (Object re: res){ Object[] rA = (Object [])re; String[] splitted = ((String) rA[0]).split("/"); String id = splitted[splitted.length-1]; try { String xml = createFoxml(id); String ret = ingest(xml); config.setServerURL(new URL((String)rA[1])); client.setConfig(config); params = new Object[]{ret}; client.execute("setPID", params); System.out.println(ret); } catch (Exception e) { System.err.println("cannot get:"+(String) rA[0]); } } } private static void modifyDCSet(String litid, String PID) throws XmlRpcException, ServiceException, IOException { XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); XmlRpcClient client = new XmlRpcClient(); config.setServerURL(new URL(ZOPEPROVIDER+"/metadataMain")); client.setConfig(config); Object[] params = new Object[]{"http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta"}; String result = (String) client.execute("getDCFormatted", params); //System.out.println("dC:"+result); FedoraAPIM APIM; FedoraAPIA APIA; System.setProperty("javax.net.ssl.trustStore", "/usr/local/fedora/tomcat/conf/keystore"); String baseURL = "https://127.0.0.1:8443/fedora"; FedoraClient fc = new FedoraClient(baseURL, "fedoraAdmin", "XXXX"); APIA=fc.getAPIA(); APIM=fc.getAPIM(); String content = "<oai_dc:dc xmlns:oai_dc='http://www.openarchives.org/OAI/2.0/oai_dc/' xmlns:dc='http://purl.org/dc/elements/1.1/'><dc:description>VLP Literature Object</dc:description><dc:publisher>MPIWG</dc:publisher>"; content += result; content += "</oai_dc:dc>"; //content=""; //System.out.println(content); String chksum = MD5.asHex(new MD5(content).Final()); //System.out.println(chksum); String[] em= new String[]{}; String pid = APIM.modifyDatastreamByValue(PID, "DC",em , "Dublin Core Record for this object", "text/xml", "",content.getBytes("utf-8"), null, null,"metadata changed",false); System.out.println(pid); } public static void main(String[] args) throws Exception { //ingestAllVLPObjects(); changeDCMetadata(); //modifyDCSet("lit14191","mpiwg:PR9MPM4E"); } private static void changeDCMetadata() throws MalformedURLException, ServiceException, IOException, RemoteException, XmlRpcException { System.setProperty("javax.net.ssl.trustStore", "/usr/local/fedora/tomcat/conf/keystore"); String baseURL = "https://127.0.0.1:8443/fedora"; FedoraClient fc = new FedoraClient(baseURL, "fedoraAdmin", "XXX"); FedoraAPIA APIA; APIA=fc.getAPIA(); Condition[] condition = {new Condition("pid", ComparisonOperator.has, "mpiwg:*")}; FieldSearchQuery query = new FieldSearchQuery(condition, null); NonNegativeInteger maxResults = new NonNegativeInteger("" + 200); FieldSearchResult res = APIA.findObjects(new String[]{"pid"}, maxResults, query); processResults(APIA, res); while (true) { ListSession ses = res.getListSession(); if (ses == null) break; String tok = ses.getToken(); if (tok == null) break; res = APIA.resumeFindObjects(res.getListSession().getToken()); if (res.getResultList().length>0) processResults(APIA, res); else break; } } private static void processResults(FedoraAPIA APIA, FieldSearchResult res) throws XmlRpcException, ServiceException, IOException { ObjectFields[] fields = res.getResultList(); System.out.println("found:"+fields.length); for (ObjectFields field: fields){ String pid = field.getPid(); MIMETypedStream ds = APIA.getDatastreamDissemination(pid, "vlp-admin", null); byte[] x = ds.getStream(); String s = new String(x); //System.err.println(s); Pattern p = Pattern.compile("<vlp:identifier>(lit.*)</vlp:identifier>"); Matcher m = p.matcher(s); m.find(); String lit = m.group(1); System.out.println(lit); modifyDCSet(lit, pid); } } }