Mercurial > hg > IndexMetaContextualizer
view src/main/java/de/mpiwg/indexmeta/AnnotateIndexMeta.java @ 0:dfce13a5f5f9
nit project!
author | Jorge Urzua <jurzua@mpiwg-berlin.mpg.de> |
---|---|
date | Thu, 11 Apr 2013 15:25:26 +0200 |
parents | |
children | 8f6c4dab5d17 |
line wrap: on
line source
package de.mpiwg.indexmeta; // import stuff import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.List; import javax.print.attribute.standard.MediaSize.Other; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class AnnotateIndexMeta { public static void main(String argv[]) { System.out.println("in main"); // Methodenaufruf String filepath = "/Users/kthoden/eclipse/workspace/dm2eStuff/data/index.meta"; // this is a list of all the elements we want to contextualize List<String> contextualizableList = Arrays.asList(new String[]{"author","editor","publisher","city","holding-library","keywords"}); xmlParse(filepath,contextualizableList); System.out.println("Done"); } /** * Parses the XML file given as first argument and writes attributes in elements that are to be contextualized. * @param filepath path to the file. It will also be used as the basis for the output file (this adds "-annot"). * @param contextualizableList contains the elements that shall be given a context identifier which is later used to grab the contents and put them into the database to have it contextualized. * */ public static void xmlParse(String filepath, List<String> contextualizableList) { try { // this is how the outputfile will be called String outfilepath = filepath + "-annot"; // open the file and parse it DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); Document doc = docBuilder.parse(filepath); // iterate through the document Integer count = 0; for(String contextElement : contextualizableList){ NodeList nodeList = doc.getElementsByTagName(contextElement); for(int i=0; i < nodeList.getLength(); i++){ Node iter2 = nodeList.item(i); NamedNodeMap attr = iter2.getAttributes(); // make a new attribute // DONE would be good if it left existing outputs alone if (attr.getNamedItem("context-id") == null){ Attr attribute = doc.createAttribute ("context-id"); attribute.setValue (count.toString()); attr.setNamedItem (attribute); } else { System.out.println("schon da: " + attr.getNamedItem("context-id")); } // Just for comfort. Print it out. System.out.println(contextElement); count++; } // get the element by name (so they should be unique?) //Node iter2 = doc.getElementsByTagName(contextElement).item(0); } // write the content into xml file TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); DOMSource source = new DOMSource(doc); StreamResult result = new StreamResult(new File(outfilepath)); transformer.transform(source, result); } catch (ParserConfigurationException pce) { pce.printStackTrace(); } catch (TransformerException tfe) { tfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } catch (SAXException sae) { sae.printStackTrace(); } } }