view src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java @ 0:c6929e63b0b8

first import
author dwinter
date Wed, 24 Nov 2010 16:52:07 +0100
parents
children fab8e78184fa
line wrap: on
line source

package de.mpiwg.itgroup.eSciDoc.harvesting;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;


import org.apache.http.HttpResponse;
import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.xml.DOMConfigurator;
import org.jdom.JDOMException;

import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource;
import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter;
import de.mpiwg.itgroup.eSciDoc.importer.Importer;
import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer;
import de.mpiwg.itgroup.eSciDoc.transformer.Transformer;
import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;

public class ESciDocDataHarvester {
	
	protected Logger logger = Logger.getRootLogger();
	protected Importer importer;
	protected EScidocBasicHandler connector;
	protected Transformer transformer;
	private EScidocTools tools;
	private String echoContext;
	private Logger addedFile = Logger.getLogger("addedFilesLogger");
	private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger");
	
	
	public ESciDocDataHarvester(Importer importer, Transformer transformer, EScidocBasicHandler connector, String context) throws IOException{
		this.importer=importer;
		this.transformer=transformer;
		this.connector=connector;
		this.tools=new EScidocTools(connector);
		this.echoContext= context;
	
		
	}
	public Boolean readObjectsFromInstance(String type) throws Exception{
		ArrayList<String> addedObjects = new ArrayList<String>();
		ArrayList<String> notAddedObjects = new ArrayList<String>();
		for (ECHOObject obj: importer.getObjectList(type)){
			
			
			if (ECHORessource.class.isInstance(obj)){
			try {
				if (connector.alreadyExists("/md-records/md-record/admin/archivePath",((ECHORessource)obj).archivePath,echoContext)){
					logger.debug("already exist:"+((ECHORessource)obj).archivePath);
					continue;
				}
			} catch (Exception e) {
				logger.debug("already exist error");
				e.printStackTrace();
				continue;
			}
			}
			
			obj.context=echoContext;
			
			String contid=connector.getIDfromPID(obj.pid,echoContext);
			if (contid!=null){
				System.out.println("------- belongsTo:"+contid);
			} else {
			
			eSciDocXmlObject escidocItem = transformer.transform(obj);
			logger.info(escidocItem.printXML());
			// TODO write PID to back to echo-obj
			Boolean result = connector.createItem(escidocItem);
			if (result){
				addedObjects.add(escidocItem.getESciDocId());
				addedFile.debug(escidocItem.getESciDocId()+"\n");
				//addedFile.write(escidocItem.getESciDocId()+"\n");
				//addedFile.flush();
			
			}else {
				notAddedObjects.add(obj.echoUrl);
				notAddedFile.debug(obj.echoUrl);
				//notAddedFile.write(obj.echoUrl+"\n");
				//notAddedFile.flush();
			}
			//if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){
			//	logger.info("PID already exists:"+obj); 
			//} else if (result == ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){
			//	logger.info("Object with reference to the same digital object already exists:"+obj);
			//}
			
			}
		}
		if(logger.getLevel()==Level.DEBUG){
			for (String addedObject:addedObjects){
				logger.debug(addedObject);
			}
		}
		
//		File outFile = new File("/tmp/import.out");
//		FileWriter fw = new FileWriter(outFile);
//		for (String addedObject:addedObjects){
//			fw.write(addedObject+"\n");
//		}
//		for (String addedObject:notAddedObjects){
//			fw.write(addedObject+"\n");
//		}
//		fw.close();
		return true;
	}

	public void releaseAndSubmitObjects(String command,String objectXPath) throws Exception{
		for (eSciDocXmlObject obj: connector.getObjectListFromFilterResult(command,objectXPath)){
			HttpResponse res = connector.submitAnObject(obj,"first release");
			logger.debug(res.getStatusLine());
			if (res.getStatusLine().getStatusCode()!=200){
				logger.debug("Can not submit:"+obj.getESciDocId());
				//res.getEntity().consumeContent(); // necessary to release the conneciton
			
			}
			res.getEntity().consumeContent(); // necessary to release the conneciton
			
			if (!connector.upDateObject(obj)){
				logger.debug("Can not update:"+obj.getESciDocId());
				//continue;
				
			}
			
			
			res = connector.releaseAnObject(obj, "first release");
			logger.debug(res.getStatusLine());
			if (res.getStatusLine().getStatusCode()!=200){
				logger.debug("Can not release:"+obj.getESciDocId());
				res.getEntity().consumeContent(); // necessary to release the conneciton
				continue;
			}
			addedFile.debug("RELEASED:"+obj.getESciDocId());
			res.getEntity().consumeContent(); // necessary to release the conneciton
		}
			
	}
	public static void main(String[] args) throws Exception{
		
		Logger rl = Logger.getRootLogger();
		DOMConfigurator.configure("log4uconf.xml");
        rl.setLevel(Level.DEBUG);

        
		EScidocBasicHandler connector = new EScidocBasicHandler("192.168.56.2",8080,"dwinter","weikiki7");
		ECHOImporter newimporter = new ECHOImporter(new URL("file:///Users/dwinter/libcoll.rdf"));
		ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter,
				new ECHOTransformer(),connector,"/ir/context/escidoc:40001"); 
		
		//hv.readObjectsFromInstance("ECHO_collection");
		//hv.readObjectsFromInstance("ECHO_resource");
		
		hv.releaseAndSubmitObjects("/ir/context/escidoc:40001/resources/members","//escidocItem:item");
		
		
//		newimporter.organizeRessourcesInCollections(connector, "/ir/context/escidoc:1001");
		//hv.releaseAndSubmitObjects("/ir/containers","//container:container");
	}
}