0
|
1 package de.mpiwg.indexmeta;
|
|
2 // import stuff
|
|
3 import java.io.File;
|
|
4 import java.io.IOException;
|
|
5 import java.util.Arrays;
|
|
6 import java.util.List;
|
|
7
|
|
8 import javax.print.attribute.standard.MediaSize.Other;
|
|
9 import javax.xml.parsers.DocumentBuilder;
|
|
10 import javax.xml.parsers.DocumentBuilderFactory;
|
|
11 import javax.xml.parsers.ParserConfigurationException;
|
|
12 import javax.xml.transform.Transformer;
|
|
13 import javax.xml.transform.TransformerException;
|
|
14 import javax.xml.transform.TransformerFactory;
|
|
15 import javax.xml.transform.dom.DOMSource;
|
|
16 import javax.xml.transform.stream.StreamResult;
|
|
17
|
|
18 import org.w3c.dom.Attr;
|
|
19 import org.w3c.dom.Document;
|
|
20 import org.w3c.dom.NamedNodeMap;
|
|
21 import org.w3c.dom.Node;
|
|
22 import org.w3c.dom.NodeList;
|
|
23 import org.xml.sax.SAXException;
|
|
24
|
|
25 public class AnnotateIndexMeta {
|
|
26
|
|
27 public static void main(String argv[]) {
|
|
28 System.out.println("in main");
|
|
29
|
|
30 // Methodenaufruf
|
|
31 String filepath = "/Users/kthoden/eclipse/workspace/dm2eStuff/data/index.meta";
|
|
32 // this is a list of all the elements we want to contextualize
|
|
33 List<String> contextualizableList = Arrays.asList(new String[]{"author","editor","publisher","city","holding-library","keywords"});
|
|
34 xmlParse(filepath,contextualizableList);
|
|
35 System.out.println("Done");
|
|
36 }
|
|
37
|
|
38 /**
|
|
39 * Parses the XML file given as first argument and writes attributes in elements that are to be contextualized.
|
|
40 * @param filepath path to the file. It will also be used as the basis for the output file (this adds "-annot").
|
|
41 * @param contextualizableList contains the elements that shall be given a context identifier which is later used to grab the contents and put them into the database to have it contextualized.
|
|
42 *
|
|
43 */
|
|
44 public static void xmlParse(String filepath, List<String> contextualizableList) {
|
|
45 try {
|
|
46 // this is how the outputfile will be called
|
|
47 String outfilepath = filepath + "-annot";
|
|
48 // open the file and parse it
|
|
49 DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
|
|
50 DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
|
|
51 Document doc = docBuilder.parse(filepath);
|
|
52
|
|
53 // iterate through the document
|
|
54 Integer count = 0;
|
|
55 for(String contextElement : contextualizableList){
|
|
56 NodeList nodeList = doc.getElementsByTagName(contextElement);
|
|
57 for(int i=0; i < nodeList.getLength(); i++){
|
|
58 Node iter2 = nodeList.item(i);
|
|
59 NamedNodeMap attr = iter2.getAttributes();
|
|
60 // make a new attribute
|
|
61 // DONE would be good if it left existing outputs alone
|
|
62 if (attr.getNamedItem("context-id") == null){
|
|
63 Attr attribute = doc.createAttribute ("context-id");
|
|
64 attribute.setValue (count.toString());
|
|
65 attr.setNamedItem (attribute);
|
|
66 }
|
|
67 else {
|
|
68 System.out.println("schon da: " + attr.getNamedItem("context-id"));
|
|
69 }
|
|
70 // Just for comfort. Print it out.
|
|
71 System.out.println(contextElement);
|
|
72 count++;
|
|
73 }
|
|
74 // get the element by name (so they should be unique?)
|
|
75 //Node iter2 = doc.getElementsByTagName(contextElement).item(0);
|
|
76 }
|
|
77 // write the content into xml file
|
|
78 TransformerFactory transformerFactory = TransformerFactory.newInstance();
|
|
79 Transformer transformer = transformerFactory.newTransformer();
|
|
80 DOMSource source = new DOMSource(doc);
|
|
81 StreamResult result = new StreamResult(new File(outfilepath));
|
|
82 transformer.transform(source, result);
|
|
83
|
|
84 } catch (ParserConfigurationException pce) {
|
|
85 pce.printStackTrace();
|
|
86 } catch (TransformerException tfe) {
|
|
87 tfe.printStackTrace();
|
|
88 } catch (IOException ioe) {
|
|
89 ioe.printStackTrace();
|
|
90 } catch (SAXException sae) {
|
|
91 sae.printStackTrace();
|
|
92 }
|
|
93 }
|
|
94 } |