view src/main/java/de/mpiwg/indexmeta/IndexMetaParser.java @ 8:9ce7979fd037

Implementation Data Provider
author Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
date Wed, 24 Apr 2013 10:34:23 +0200
parents 7d231e4e86e5
children
line wrap: on
line source

package de.mpiwg.indexmeta;

import java.io.File;
import java.io.FilenameFilter;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.lang.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import de.mpiwg.indexmeta.bo.Contextualization;

public class IndexMetaParser {

	public static String indexMetaFile = "index.meta";
	public static String annotatedIndexMetaFile = "index.meta.annot";
	
	/*
	public static List<Contextualization> getCtxItems(String filePath){
		List<Contextualization> rs = new ArrayList<Contextualization>();
		
		try {
			File file = new File("/Users/jurzua/Projects/workspace/contextualization/data/index.meta/01index.meta.anno.xml");
			
			if(file.isFile()){
				rs = getCtxItemsFromFile(file, rs);
			}else if(file.isDirectory()){
				rs = getCtxItemsFromDirectory(file, rs);
			}
	        
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		return rs;
	}
	*/
	
	public static List<Contextualization> getCtxFromDirectory(String directory){
		List<Contextualization> rs = new ArrayList<Contextualization>();
		
		try {
			File file = new File(directory);
			rs = getCtxItemsFromDirectory(file, rs);				        
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		return rs;
	}
	
	public static List<Contextualization> getCtxFromResource(String indexMetaResource){
		List<Contextualization> rs = new ArrayList<Contextualization>();
		
		try {
			File file = new File(indexMetaResource);
			rs = getCtxItemsFromIndexMetaResource(file, rs);				        
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		return rs;
	}
	
	
	
	private static List<Contextualization> getCtxItemsFromDirectory(File directory, List<Contextualization> rs) throws Exception{
		String canonicalPath = directory.getCanonicalPath();
		System.out.println(canonicalPath);
		
		File[] files = directory.listFiles();
		
		for(File indexMetaResource : files){
			
			if(indexMetaResource.isDirectory()){
				rs = getCtxItemsFromIndexMetaResource(indexMetaResource, rs);
			}
		}
		return rs;
	}
	
	private static List<Contextualization> getCtxItemsFromIndexMetaResource(File indexMetaResource, List<Contextualization> rs){
		
		System.out.print("Working on: " + indexMetaResource.getName());
		
		File[] list0 = indexMetaResource.listFiles(indexMetaFilter);
		File annotatedFile = null;
		
		if(list0.length != 0){
			System.out.print("\tFound: " + indexMetaFile);
			File[] list1 = indexMetaResource.listFiles(annotatedIndexMetaFilter);
			if(list1.length == 0){
				//create annotated file
				System.out.print("\tAnnotated no found");
				try{
					annotatedFile = AnnotateIndexMeta.xmlParse(list0[0].getAbsolutePath(), null);
				}catch (Exception e) {
					e.printStackTrace();
				}
				
			}else{
				annotatedFile = list1[0];
			}
		}
		rs = getCtxItemsFromFile(annotatedFile, rs, indexMetaResource.getName());
		
		System.out.println();
		
		return rs;
	}
	
	private static List<Contextualization> getCtxItemsFromFile(File file, List<Contextualization> rs, String indexMetaId){
		
		try {
			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
			DocumentBuilder builder = factory.newDocumentBuilder();
			Document doc = builder.parse(file);
			
			
			for(String ctxElement : Contextualization.contextualizableList){}
			
			List<Node> nodes = getNodeList(doc, Contextualization.AUTHOR);
			for(Node node : nodes){
				Contextualization ctx = getCtx(node, indexMetaId);
				if(ctx != null){
					rs.add(ctx);
				}				
			}

			nodes = getNodeList(doc, Contextualization.CITY);
			for(Node node : nodes){
				Contextualization ctx = getCtx(node, indexMetaId);
				if(ctx != null){
					rs.add(ctx);
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		
		return rs;
	}
	
	
	public static List<Node> getNodeList(Document doc, String tagName){
		List<Node> rs = new ArrayList<Node>();
		
		try {
			XPathFactory xPathfactory = XPathFactory.newInstance();
			XPath xpath = xPathfactory.newXPath();
			XPathExpression expr = xpath.compile("//" + tagName);
			
	        Object result = expr.evaluate(doc, XPathConstants.NODESET);
	        
	        NodeList nodes = (NodeList) result;
	        for (int i = 0; i < nodes.getLength(); i++) {
	        	Node node = nodes.item(i);
	        	rs.add(node);
	        }
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		return rs;
		
	}
	
	public static Contextualization getCtx(Node node, String indexMetaId){
		if(node instanceof Element){
			try{
				Element elem = (Element)node;
				Contextualization ctx = new Contextualization();
				ctx.setIndexMetaId(indexMetaId);
				ctx.setType(elem.getNodeName());
				ctx.setElementId(elem.getAttribute("context-id"));
				
				Node child = elem.getFirstChild();
				if(child != null){
					ctx.setContent(child.getNodeValue());
				}
				return ctx;
			}catch (Exception e) {
				e.printStackTrace();
			}	
		}
			
		
		return null;
	}
	
	public static void main(String[] args){

		//--/Volumes/online_permanent/library
		//getCtxItems("/Users/jurzua/Projects/workspace/contextualization/data/index.meta/01index.meta");
		List<Contextualization> rs = getCtxFromDirectory("/Users/jurzua/Projects/max-planck/index_meta/library");
		//List<Contextualization> rs = getCtxFromResource("/Users/jurzua/Projects/max-planck/index_meta/library/BB1RH90M");
		
		for(Contextualization ctx : rs){
			System.out.println(ctx.toString());
		}
		
	}
	
    public static void printXpathResult(Object result){
        NodeList nodes = (NodeList) result;
        for (int i = 0; i < nodes.getLength(); i++) {
        	Node node = nodes.item(i);
        	if(node instanceof Element){
            	Element e = (Element)node;
            	
                System.out.println("Local Name= " + node.getLocalName());
                System.out.println("Value= " + node.getNodeValue());
                System.out.println("Name= " + node.getNodeName());
                System.out.println("getFirstChild value= " + node.getFirstChild().getNodeValue());
                System.out.println(node);
        	}
        }
    }
    
    private static FilenameFilter indexMetaFilter = new FilenameFilter() {
		public boolean accept(File directory, String fileName) {
            return StringUtils.equals(fileName, indexMetaFile);
        }
	};
	
	private static FilenameFilter annotatedIndexMetaFilter = new FilenameFilter() {
		public boolean accept(File directory, String fileName) {
            return StringUtils.equals(fileName, annotatedIndexMetaFile);
        }
	};
}