Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/ArchimedesDocManager.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.lt.doc; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; import java.util.Date; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import com.sun.org.apache.xerces.internal.parsers.SAXParser; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants; import de.mpg.mpiwg.berlin.mpdl.util.Util; import de.mpg.mpiwg.berlin.mpdl.util.FilenameFilterExtension; public class ArchimedesDocManager { private static ArchimedesDocManager instance; private static String MPDL_DOC_DIR = MpdlConstants.MPDL_DOC_DIR; private static String ARCH_DOC_DIR = MPDL_DOC_DIR + "/documents/archimedes"; private static String ARCH_DOC_OUT_DIR = MPDL_DOC_DIR + "/documentsTranscodedToUnicode/archimedes"; private ArchimedesDocContentHandler archimedesDocContentHandler; private ArchimedesDocForeignLangContentHandler archimedesDocForeignLangContentHandler; private Date beginOfOperation; private Date endOfOperation; public static ArchimedesDocManager getInstance() throws ApplicationException { if (instance == null) { instance = new ArchimedesDocManager(); } return instance; } /** * */ public static void main(String[] args) throws ApplicationException { getInstance(); instance.beginOperation(); System.out.print("Start ..."); // Greek String inputDirGreek = ARCH_DOC_DIR + "/el"; String outputDirGreek = ARCH_DOC_OUT_DIR + "/el"; // instance.transcodeDirectory("el", "betacode", "unicode", inputDirGreek, outputDirGreek); // Arabic String inputDirArabic = ARCH_DOC_DIR + "/ar"; String outputDirArabic = ARCH_DOC_OUT_DIR + "/ar"; // instance.transcodeDirectory("ar", "buckwalter", "unicode", inputDirArabic, outputDirArabic); // Foreign lang=greek transcoding instance.transcodeForeignLangFiles(); instance.end(); instance.endOperation(); Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation); System.out.println("End."); System.out.println("Needed time: " + elapsedTime + " seconds"); } private void transcodeDirectory(String language, String fromEncoding, String toEncoding, String inputDirName, String outputDirName) throws ApplicationException { File inputDir = new File(inputDirName); FilenameFilter filter = new FilenameFilterExtension("xml"); File[] files = inputDir.listFiles(filter); for (int i=0; i < files.length; i++) { File inputFile = files[i]; String outputFileName = inputFile.getName(); File outputFile = new File(outputDirName + "/" + outputFileName); File outputDir = new File(outputFile.getParent()); if (! outputDir.exists()) { outputDir.mkdirs(); // create the directory including parent directories which do not exist } transcodeFile(language, fromEncoding, toEncoding, inputFile, outputFile); } } private void transcodeFile(String language, String fromEncoding, String toEncoding, File inputFile, File outputFile) throws ApplicationException { archimedesDocContentHandler = new ArchimedesDocContentHandler(language, fromEncoding, toEncoding, outputFile); try { XMLReader xmlParser = new SAXParser(); xmlParser.setContentHandler(archimedesDocContentHandler); InputStream inputStream = new FileInputStream(inputFile); BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream); InputSource input = new InputSource(bufferedInputStream); xmlParser.parse(input); bufferedInputStream.close(); } catch (SAXException e) { throw new ApplicationException(e); } catch (IOException e) { throw new ApplicationException(e); } } private void transcodeForeignLangFiles() throws ApplicationException { String[] languages = {"en", "fr", "it", "la"}; for (int i=0; i<languages.length; i++) { String language = languages[i]; File inputDir = new File(ARCH_DOC_DIR + "/" + language); String outputDirName = ARCH_DOC_OUT_DIR + "/" + language; FilenameFilter filter = new FilenameFilterExtension("xml"); File[] files = inputDir.listFiles(filter); for (int j=0; j < files.length; j++) { File inputFile = files[j]; String outputFileName = inputFile.getName(); File outputFile = new File(outputDirName + "/" + outputFileName); File outputDir = new File(outputFile.getParent()); if (! outputDir.exists()) { outputDir.mkdirs(); // create the directory including parent directories which do not exist } transcodeForeignLangFile(inputFile, outputFile); } } } private void transcodeForeignLangFile(File inputFile, File outputFile) throws ApplicationException { archimedesDocForeignLangContentHandler = new ArchimedesDocForeignLangContentHandler(outputFile); try { XMLReader xmlParser = new SAXParser(); xmlParser.setContentHandler(archimedesDocForeignLangContentHandler); InputStream inputStream = new FileInputStream(inputFile); BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream); InputSource input = new InputSource(bufferedInputStream); xmlParser.parse(input); bufferedInputStream.close(); } catch (SAXException e) { throw new ApplicationException(e); } catch (IOException e) { throw new ApplicationException(e); } } private void end() throws ApplicationException { } private void beginOperation() { beginOfOperation = new Date(); } private void endOperation() { endOfOperation = new Date(); } }