annotate src/main/java/de/mpiwg/indexmeta/IndexMetaParser.java @ 0:dfce13a5f5f9

nit project!
author Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
date Thu, 11 Apr 2013 15:25:26 +0200
parents
children 7d231e4e86e5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
1 package de.mpiwg.indexmeta;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
2
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
3 import java.io.File;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
4 import java.util.ArrayList;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
5 import java.util.List;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
6
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
7 import javax.xml.parsers.DocumentBuilder;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
8 import javax.xml.parsers.DocumentBuilderFactory;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
9 import javax.xml.xpath.XPath;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
10 import javax.xml.xpath.XPathConstants;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
11 import javax.xml.xpath.XPathExpression;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
12 import javax.xml.xpath.XPathFactory;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
13
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
14 import org.w3c.dom.Document;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
15 import org.w3c.dom.Element;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
16 import org.w3c.dom.Node;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
17 import org.w3c.dom.NodeList;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
18
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
19 import de.mpiwg.indexmeta.bo.Contextualization;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
20
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
21 public class IndexMetaParser {
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
22
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
23
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
24 public static List<Contextualization> getCtxItems(String filePath){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
25 List<Contextualization> rs = new ArrayList<Contextualization>();
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
26
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
27 try {
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
28 File file = new File("/Users/jurzua/Projects/workspace/contextualization/data/index.meta/01index.meta.anno.xml");
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
29
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
30 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
31 DocumentBuilder builder = factory.newDocumentBuilder();
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
32 Document doc = builder.parse(file);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
33
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
34 List<Node> nodes = getNodeList(doc, Contextualization.AUTHOR);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
35 for(Node node : nodes){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
36 Contextualization ctx = getCtx(node, "XX");
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
37 if(ctx != null){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
38 rs.add(ctx);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
39 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
40 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
41
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
42 nodes = getNodeList(doc, Contextualization.CITY);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
43 for(Node node : nodes){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
44 Contextualization ctx = getCtx(node, "XX");
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
45 if(ctx != null){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
46 rs.add(ctx);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
47 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
48 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
49
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
50 for(Contextualization ctx : rs){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
51 System.out.println(ctx.toString());
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
52 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
53
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
54 } catch (Exception e) {
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
55 e.printStackTrace();
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
56 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
57
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
58 return rs;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
59 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
60
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
61 public static List<Node> getNodeList(Document doc, String tagName){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
62 List<Node> rs = new ArrayList<Node>();
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
63
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
64 try {
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
65 XPathFactory xPathfactory = XPathFactory.newInstance();
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
66 XPath xpath = xPathfactory.newXPath();
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
67 XPathExpression expr = xpath.compile("//" + tagName);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
68
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
69 Object result = expr.evaluate(doc, XPathConstants.NODESET);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
70
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
71 NodeList nodes = (NodeList) result;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
72 for (int i = 0; i < nodes.getLength(); i++) {
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
73 Node node = nodes.item(i);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
74 rs.add(node);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
75 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
76 } catch (Exception e) {
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
77 e.printStackTrace();
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
78 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
79
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
80 return rs;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
81
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
82 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
83
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
84 public static Contextualization getCtx(Node node, String indexMetaId){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
85 if(node instanceof Element){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
86 try{
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
87 Element elem = (Element)node;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
88 Contextualization ctx = new Contextualization();
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
89 ctx.setIndexMetaId(indexMetaId);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
90 ctx.setType(elem.getNodeName());
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
91 ctx.setElementId(elem.getAttribute("context-id"));
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
92
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
93 Node child = elem.getFirstChild();
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
94 if(child != null){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
95 ctx.setContent(child.getNodeValue());
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
96 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
97 return ctx;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
98 }catch (Exception e) {
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
99 e.printStackTrace();
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
100 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
101 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
102
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
103
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
104 return null;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
105 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
106
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
107 public static void main(String[] args){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
108
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
109 getCtxItems("/Users/jurzua/Projects/workspace/contextualization/data/index.meta/01index.meta");
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
110 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
111
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
112 public static void printXpathResult(Object result){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
113 NodeList nodes = (NodeList) result;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
114 for (int i = 0; i < nodes.getLength(); i++) {
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
115 Node node = nodes.item(i);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
116 if(node instanceof Element){
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
117 Element e = (Element)node;
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
118
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
119 System.out.println("Local Name= " + node.getLocalName());
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
120 System.out.println("Value= " + node.getNodeValue());
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
121 System.out.println("Name= " + node.getNodeName());
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
122 System.out.println("getFirstChild value= " + node.getFirstChild().getNodeValue());
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
123 System.out.println(node);
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
124 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
125 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
126 }
dfce13a5f5f9 nit project!
Jorge Urzua <jurzua@mpiwg-berlin.mpg.de>
parents:
diff changeset
127 }