view src/de/mpiwg/itgroup/eSciDoc/Tools/ingestLib.java @ 0:c6929e63b0b8

first import
author dwinter
date Wed, 24 Nov 2010 16:52:07 +0100
parents
children
line wrap: on
line source

package de.mpiwg.itgroup.eSciDoc.Tools;
import java.io.IOException;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.rmi.RemoteException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.rpc.ServiceException;

import org.apache.axis.types.NonNegativeInteger;
import org.apache.xmlrpc.XmlRpcException;
import org.apache.xmlrpc.client.XmlRpcClient;
import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;


public class ingestLib extends IngestECHO{

	/**
	 * @param args
	 * @throws Exception 
	 * @throws Exception 
	 */
	
	private static String ZOPEPROVIDER = "http://127.0.0.1:18080";
	private static String createFoxml(String litid) throws Exception{
		
		String pid = 
		eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:"+pid,"vlpRessourceTemplate.xml");
		//obj.setTitle("lit11111 - title");
		
		
		//get DC Metadata from the Ressource
		XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
		XmlRpcClient client = new XmlRpcClient();
		Object[] params = new Object[]{};
		config.setServerURL(new URL(ZOPEPROVIDER+"/metadataMain")); 
	    client.setConfig(config);
	    params = new Object[]{"http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta"};
	    String result = (String) client.execute("getDCFormatted", params);
	    System.out.println("dC:"+result);
	   
	    
	    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
		factory.setNamespaceAware(true);
		DocumentBuilder db =factory.newDocumentBuilder();
		InputSource resultStream = new InputSource(new StringReader(result));
		Document dc = db.parse(resultStream);
		obj.insertDC(dc);
		
		obj.setReferenceFolder("/mpiwg/online/permanent/vlp/"+litid);
		//obj.generateXMLIndex(new File("/mpiwg/online/permanent/vlp/"+litid));
		//long date = new Date().getTime();
		SimpleDateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss.S'Z'");
		String dateStr = dateformat.format(new Date());
		obj.addWebUrl("http://vlp.mpiwg-berlin.mpg.de/references?id="+litid, litid, dateStr);
		obj.addIndexMetaUrl("http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta", litid, dateStr);
		obj.setRelationship("info:fedora/vlp:col1");
		obj.setVLPId(litid);
		return obj.printXML();
	
	}
	


 


	private static void ingestAllVLPObjects() throws MalformedURLException,
			XmlRpcException {
		XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
		XmlRpcClient client = new XmlRpcClient();
		config.setServerURL(new URL("http://127.0.0.1:18080/vlp/vlp_coll/library/data"));
		client.setConfig(config);
		Object[] params = new Object[]{};
		Object[] res = (Object[]) client.execute("getAllRessources", params);
		
		String string = "";
		for (Object re: res){
			Object[] rA = (Object [])re;
			String[] splitted = ((String) rA[0]).split("/");
			String id = splitted[splitted.length-1];
	
			try {
				String xml = createFoxml(id);

				String ret = ingest(xml);
				
				config.setServerURL(new URL((String)rA[1]));
				client.setConfig(config);
				params = new Object[]{ret};
				client.execute("setPID", params);
				
				System.out.println(ret);
			
				
			} catch (Exception e) {
				System.err.println("cannot get:"+(String) rA[0]);
				
			}
		}
	}

	private static void modifyDCSet(String litid, String PID) throws XmlRpcException, ServiceException, IOException
	{
		XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
		XmlRpcClient client = new XmlRpcClient();
		config.setServerURL(new URL(ZOPEPROVIDER+"/metadataMain"));
	    
	  
		client.setConfig(config);
	    Object[] params = new Object[]{"http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta"};
	    String result = (String) client.execute("getDCFormatted", params);
	    //System.out.println("dC:"+result);
	    
		FedoraAPIM APIM; 
		FedoraAPIA APIA; 
		  
		  System.setProperty("javax.net.ssl.trustStore", "/usr/local/fedora/tomcat/conf/keystore");
		
		
		  String baseURL =  "https://127.0.0.1:8443/fedora"; 
		  FedoraClient fc = new FedoraClient(baseURL, "fedoraAdmin", "XXXX"); 
		  APIA=fc.getAPIA(); 
		  APIM=fc.getAPIM(); 
		  String content = "<oai_dc:dc xmlns:oai_dc='http://www.openarchives.org/OAI/2.0/oai_dc/' xmlns:dc='http://purl.org/dc/elements/1.1/'><dc:description>VLP Literature Object</dc:description><dc:publisher>MPIWG</dc:publisher>";
		  content += result;
		  content += "</oai_dc:dc>";
		  //content="";
		  
		  //System.out.println(content);
		  
		  
		  String chksum = MD5.asHex(new MD5(content).Final());

		  //System.out.println(chksum);
		  String[] em= new String[]{};
		  String pid = APIM.modifyDatastreamByValue(PID, "DC",em , "Dublin Core Record for this object", "text/xml", "",content.getBytes("utf-8"), null, null,"metadata changed",false);
		  System.out.println(pid);
		  
	}

	
	public static void main(String[] args) throws Exception  {
		
		//ingestAllVLPObjects();
		changeDCMetadata();
		//modifyDCSet("lit14191","mpiwg:PR9MPM4E");
	
}

	private static void changeDCMetadata() throws MalformedURLException,
			ServiceException, IOException, RemoteException, XmlRpcException {
		System.setProperty("javax.net.ssl.trustStore", "/usr/local/fedora/tomcat/conf/keystore");
			
		String baseURL =  "https://127.0.0.1:8443/fedora"; 
		FedoraClient fc = new FedoraClient(baseURL, "fedoraAdmin", "XXX"); 
		FedoraAPIA APIA; 
		APIA=fc.getAPIA(); 
		 Condition[] condition =
         {new Condition("pid", ComparisonOperator.has, "mpiwg:*")};
		 FieldSearchQuery query = new FieldSearchQuery(condition, null);
		 
		NonNegativeInteger maxResults = new NonNegativeInteger("" + 200);

		FieldSearchResult res = APIA.findObjects(new String[]{"pid"}, maxResults, query);
		processResults(APIA, res);
		while (true)
		{
			ListSession ses = res.getListSession();
			if (ses == null)
				break;
			String tok = ses.getToken();
			if (tok == null)
				break;
			
			res = APIA.resumeFindObjects(res.getListSession().getToken());
			if (res.getResultList().length>0)
				processResults(APIA, res);
			else
				break;
		}
	}

	private static void processResults(FedoraAPIA APIA, FieldSearchResult res)
			throws XmlRpcException, ServiceException, IOException {
		ObjectFields[] fields = res.getResultList();
		
		System.out.println("found:"+fields.length);
		for (ObjectFields field: fields){
			String pid = field.getPid();
			MIMETypedStream ds = APIA.getDatastreamDissemination(pid, "vlp-admin", null);
			byte[] x = ds.getStream();
			String s = new String(x);
			//System.err.println(s);
			
			Pattern p = Pattern.compile("<vlp:identifier>(lit.*)</vlp:identifier>");
			Matcher m = p.matcher(s);
			m.find();
			String lit = m.group(1);
			System.out.println(lit);
			modifyDCSet(lit, pid);
		}
	}
}