view src/main/java/org/mpi/openmind/scripts/Indexmeta.java @ 90:4b6c0b368f46

new UpdateMpiwgDigitalizations script.
author Robert Casties <casties@mpiwg-berlin.mpg.de>
date Tue, 29 May 2018 21:15:06 +0200
parents 615d27dce9b3
children
line wrap: on
line source

package org.mpi.openmind.scripts;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;

import org.apache.commons.lang.StringUtils;
import org.mpi.openmind.cache.WrapperService;
import org.mpi.openmind.repository.bo.Attribute;
import org.mpi.openmind.repository.bo.Entity;
import org.mpi.openmind.repository.bo.Node;
import org.mpi.openmind.repository.services.ServiceRegistry;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;

/**
 * This script should do mainly two things:
 * 1. Create an attribute called mpiwg_id
 * 2. Create an attribute called indexmeta_folder
 * 
 * 
 * @author jurzua
 *
 */
public class Indexmeta {

	//http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=/permanent/library/SWE97E52/index.meta
	private static String ECHO_URL = "http://echo.mpiwg-berlin.mpg.de";
	
	private static String ATT_INDEXMETA_FOLDER = "indexmeta_folder";
	private static String ATT_MPIWG_ID = "mpiwg_id";
	private static String CODEX = "CODEX";
	public static String userName = "indexmeta-script";
	
	
	
	private static void saveMpiwgIds(List<Identifier> mpiwgIdList, WrapperService ws) throws Exception{
		for(Identifier item : mpiwgIdList){
			
			
			System.out.println(item);
			
			Entity codex = ws.getEntityByIdWithContent(item.ismiId);
			
			if(codex == null)
				throw new Exception("Codex " + item.ismiId + " was not found.");
			
			
			//Setting indexmeta_folder
			if(codex.getAttributeByName(ATT_INDEXMETA_FOLDER) == null){
				Attribute att = new Attribute(ATT_INDEXMETA_FOLDER, "text", item.indexmeta);
				codex.addAttribute(att);
			}else{
				codex.getAttributeByName(ATT_INDEXMETA_FOLDER).setValue(item.indexmeta);
			}
			
			//Setting mpiwg_id
			if(codex.getAttributeByName(ATT_MPIWG_ID) == null){
				Attribute att = new Attribute(ATT_MPIWG_ID, "text", item.mpiwgId);
				codex.addAttribute(att);
			}else{
				codex.getAttributeByName(ATT_MPIWG_ID).setValue(item.mpiwgId);
			}
			
			ws.saveAssertion(codex, userName);
		}
	}
	
	private static void prepareDefinitions(WrapperService ws) throws Exception{
		
		Entity codex = ws.getDefinition(CODEX);

		Attribute attIndexmetaFolder = ws.getDefAttributeByOwnValue(CODEX, ATT_INDEXMETA_FOLDER);
		
		if(attIndexmetaFolder == null){
			attIndexmetaFolder = new Attribute(Node.TYPE_TBOX, "text", ATT_INDEXMETA_FOLDER);
			attIndexmetaFolder.setSourceId(codex.getId());
			attIndexmetaFolder.setSourceObjectClass(Node.TYPE_TBOX);
			attIndexmetaFolder.setSourceModif(codex.getModificationTime());
			attIndexmetaFolder.setSystemStatus(Node.SYS_STATUS_CURRENT_VERSION);
			ws.saveDefAttribute(attIndexmetaFolder, userName);	
		}
		
		Attribute attMpiwgwgId = ws.getDefAttributeByOwnValue(CODEX, ATT_MPIWG_ID);
		
		if(attMpiwgwgId == null){
			attMpiwgwgId = new Attribute(Node.TYPE_TBOX, "text", ATT_MPIWG_ID);
			attMpiwgwgId.setSourceId(codex.getId());
			attMpiwgwgId.setSourceObjectClass(Node.TYPE_TBOX);
			attMpiwgwgId.setSourceModif(codex.getModificationTime());
			attMpiwgwgId.setSystemStatus(Node.SYS_STATUS_CURRENT_VERSION);
			ws.saveDefAttribute(attMpiwgwgId, userName);	
		}
	}
	
	private static List<Identifier> readFile(){
		List<Identifier> rs = new ArrayList<Indexmeta.Identifier>();
		
		
		
		try{
	        Scanner scan = new Scanner(
	        		new File("/Users/jurzua/Projects/ISMI/workspace/ismi-richfaces/docs/echo/archiv_id_ismi_online_2014.05.23.tab"));
	        
	        String line="";
	        //int readline = Integer.parseInt(scan.nextLine());//
	        while (scan.hasNextLine())
	        {
	            line = scan.nextLine();
	            
	            Identifier id = new Identifier(line);
	            if(id.isValid){
	            	rs.add(id);
	            }	            
	        } 
		}catch(Exception e){
			e.printStackTrace();
		}
		return rs;
	}

	public static class Identifier{
		
		public Long ismiId;
		public String indexmeta;
		public String mpiwgId;
		public String label;
		public boolean isValid = false;
		
		public Identifier(String line) throws Exception{
			String[] split = line.split("\t");
			if(split.length >= 3){
				this.indexmeta = split[0];
				this.label = split[1];
				this.ismiId = Long.parseLong(split[2]);
				
				String link0 = indexmeta.replace("/mpiwg/online", "");
				String link = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=" + link0 + "/index.meta";
				this.mpiwgId = getMpiwgId(link);
				
				this.isValid = (StringUtils.isNotEmpty(mpiwgId) && ismiId != null);
			}
		}
		
		@Override
		public String toString(){
			return label + "\t[" + indexmeta +  ", " + ismiId + ", " + mpiwgId + "]";
		}
		
	} 
	
	
	public static void execute(){
		ServiceRegistry sr = new ServiceRegistry();
		
		try {
			prepareDefinitions(sr.getWrapper());
			saveMpiwgIds(readFile(), sr.getWrapper());
			
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public static void main(String[] args) {
		execute();
		System.exit(0);
	}

	private static Document getDocument(String link){
		try {
			Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(link);
			return doc;
		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}
	
	public static String getMpiwgId(String link) throws Exception{
		
		Document doc = getDocument(link);

		XPathFactory xPathfactory = XPathFactory.newInstance();
		XPath xpath = xPathfactory.newXPath();
		
		return (String) xpath.compile("//resource/meta/dri[@type='mpiwg']").evaluate(doc, XPathConstants.STRING);
		
	}
}