Mercurial > hg > IndexMetaContextualizer
view src/main/java/de/mpiwg/indexmeta/IndexMetaParser.java @ 8:9ce7979fd037
Implementation Data Provider
author | Jorge Urzua <jurzua@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Apr 2013 10:34:23 +0200 |
parents | 7d231e4e86e5 |
children |
line wrap: on
line source
package de.mpiwg.indexmeta; import java.io.File; import java.io.FilenameFilter; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathFactory; import org.apache.commons.lang.StringUtils; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import de.mpiwg.indexmeta.bo.Contextualization; public class IndexMetaParser { public static String indexMetaFile = "index.meta"; public static String annotatedIndexMetaFile = "index.meta.annot"; /* public static List<Contextualization> getCtxItems(String filePath){ List<Contextualization> rs = new ArrayList<Contextualization>(); try { File file = new File("/Users/jurzua/Projects/workspace/contextualization/data/index.meta/01index.meta.anno.xml"); if(file.isFile()){ rs = getCtxItemsFromFile(file, rs); }else if(file.isDirectory()){ rs = getCtxItemsFromDirectory(file, rs); } } catch (Exception e) { e.printStackTrace(); } return rs; } */ public static List<Contextualization> getCtxFromDirectory(String directory){ List<Contextualization> rs = new ArrayList<Contextualization>(); try { File file = new File(directory); rs = getCtxItemsFromDirectory(file, rs); } catch (Exception e) { e.printStackTrace(); } return rs; } public static List<Contextualization> getCtxFromResource(String indexMetaResource){ List<Contextualization> rs = new ArrayList<Contextualization>(); try { File file = new File(indexMetaResource); rs = getCtxItemsFromIndexMetaResource(file, rs); } catch (Exception e) { e.printStackTrace(); } return rs; } private static List<Contextualization> getCtxItemsFromDirectory(File directory, List<Contextualization> rs) throws Exception{ String canonicalPath = directory.getCanonicalPath(); System.out.println(canonicalPath); File[] files = directory.listFiles(); for(File indexMetaResource : files){ if(indexMetaResource.isDirectory()){ rs = getCtxItemsFromIndexMetaResource(indexMetaResource, rs); } } return rs; } private static List<Contextualization> getCtxItemsFromIndexMetaResource(File indexMetaResource, List<Contextualization> rs){ System.out.print("Working on: " + indexMetaResource.getName()); File[] list0 = indexMetaResource.listFiles(indexMetaFilter); File annotatedFile = null; if(list0.length != 0){ System.out.print("\tFound: " + indexMetaFile); File[] list1 = indexMetaResource.listFiles(annotatedIndexMetaFilter); if(list1.length == 0){ //create annotated file System.out.print("\tAnnotated no found"); try{ annotatedFile = AnnotateIndexMeta.xmlParse(list0[0].getAbsolutePath(), null); }catch (Exception e) { e.printStackTrace(); } }else{ annotatedFile = list1[0]; } } rs = getCtxItemsFromFile(annotatedFile, rs, indexMetaResource.getName()); System.out.println(); return rs; } private static List<Contextualization> getCtxItemsFromFile(File file, List<Contextualization> rs, String indexMetaId){ try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(file); for(String ctxElement : Contextualization.contextualizableList){} List<Node> nodes = getNodeList(doc, Contextualization.AUTHOR); for(Node node : nodes){ Contextualization ctx = getCtx(node, indexMetaId); if(ctx != null){ rs.add(ctx); } } nodes = getNodeList(doc, Contextualization.CITY); for(Node node : nodes){ Contextualization ctx = getCtx(node, indexMetaId); if(ctx != null){ rs.add(ctx); } } } catch (Exception e) { e.printStackTrace(); } return rs; } public static List<Node> getNodeList(Document doc, String tagName){ List<Node> rs = new ArrayList<Node>(); try { XPathFactory xPathfactory = XPathFactory.newInstance(); XPath xpath = xPathfactory.newXPath(); XPathExpression expr = xpath.compile("//" + tagName); Object result = expr.evaluate(doc, XPathConstants.NODESET); NodeList nodes = (NodeList) result; for (int i = 0; i < nodes.getLength(); i++) { Node node = nodes.item(i); rs.add(node); } } catch (Exception e) { e.printStackTrace(); } return rs; } public static Contextualization getCtx(Node node, String indexMetaId){ if(node instanceof Element){ try{ Element elem = (Element)node; Contextualization ctx = new Contextualization(); ctx.setIndexMetaId(indexMetaId); ctx.setType(elem.getNodeName()); ctx.setElementId(elem.getAttribute("context-id")); Node child = elem.getFirstChild(); if(child != null){ ctx.setContent(child.getNodeValue()); } return ctx; }catch (Exception e) { e.printStackTrace(); } } return null; } public static void main(String[] args){ //--/Volumes/online_permanent/library //getCtxItems("/Users/jurzua/Projects/workspace/contextualization/data/index.meta/01index.meta"); List<Contextualization> rs = getCtxFromDirectory("/Users/jurzua/Projects/max-planck/index_meta/library"); //List<Contextualization> rs = getCtxFromResource("/Users/jurzua/Projects/max-planck/index_meta/library/BB1RH90M"); for(Contextualization ctx : rs){ System.out.println(ctx.toString()); } } public static void printXpathResult(Object result){ NodeList nodes = (NodeList) result; for (int i = 0; i < nodes.getLength(); i++) { Node node = nodes.item(i); if(node instanceof Element){ Element e = (Element)node; System.out.println("Local Name= " + node.getLocalName()); System.out.println("Value= " + node.getNodeValue()); System.out.println("Name= " + node.getNodeName()); System.out.println("getFirstChild value= " + node.getFirstChild().getNodeValue()); System.out.println(node); } } } private static FilenameFilter indexMetaFilter = new FilenameFilter() { public boolean accept(File directory, String fileName) { return StringUtils.equals(fileName, indexMetaFile); } }; private static FilenameFilter annotatedIndexMetaFilter = new FilenameFilter() { public boolean accept(File directory, String fileName) { return StringUtils.equals(fileName, annotatedIndexMetaFile); } }; }