Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/xml/SchemaHandler.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children | 59ff47d1e237 |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.xml; import java.io.File; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Date; import java.util.Iterator; import javax.xml.namespace.NamespaceContext; import org.w3c.dom.Node; import de.mpg.mpiwg.berlin.mpdl.escidoc.MetadataRecord; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants; import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation; import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars; import de.mpg.mpiwg.berlin.mpdl.util.Util; import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil; public class SchemaHandler { /** * * @param fileName * @param docOperation * @return doc root node of xml file * @throws ApplicationException */ public Node validate(String fileName, MpdlDocOperation docOperation) throws ApplicationException { File destFile = new File(fileName); String docBase = docOperation.getDocBase(); // file name validation String fName = docOperation.getFileName(); if (fName == null || fName.trim().equals("")) throw new ApplicationException("Your document file name is empty. Please specify a file name for your document."); if (! fName.endsWith(".xml")) throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document."); // RelaxNG schema validation validateByRelaxNGSchema(destFile, docBase); // parse validation Node docNode = parse(destFile); // first simple validations validate(docNode, docBase); // validate metadata MetadataRecord mdRecord = getMetadataRecord(docNode, docOperation); validate(mdRecord); docOperation.setMdRecord(mdRecord); return docNode; } public MetadataRecord getMetadataRecord(Node documentNode, MpdlDocOperation docOperation) throws ApplicationException { MetadataRecord mdRecord = null; String eXistIdentifier = docOperation.getDestUrl(); String docBase = docOperation.getDocBase(); if (docBase != null && docBase.equals("echo")) { mdRecord = getMetadataRecordEcho(documentNode); if (mdRecord != null) { String identifier = mdRecord.getIdentifier(); if (identifier == null) { String id = getIdByExistId(eXistIdentifier); mdRecord.setIdentifier("ECHO:" + id + ".xml"); } } } else if (docBase != null && docBase.equals("archimedes")) { mdRecord = getMetadataRecordArchimedes(documentNode); if (mdRecord != null) { String id = getIdByExistId(eXistIdentifier); mdRecord.setIdentifier("ARCHIMEDES:" + id + ".xml"); } } if (mdRecord != null) { mdRecord.setEXistIdentifier(eXistIdentifier); mdRecord.setMediaType("fulltext"); } return mdRecord; } public ArrayList<String> getPBFileNames(Node documentNode, String docBase) throws ApplicationException { ArrayList<String> pbFileNamesArrayStr = null; if (docBase != null && docBase.equals("echo")) { XmlUtil xmlUtil = XmlUtil.getInstance(); NamespaceContext nsContext = getEchoNsContext(); pbFileNamesArrayStr = xmlUtil.evaluateToStringArray(documentNode, "//echo:pb/@file", nsContext); } else if (docBase != null && docBase.equals("archimedes")) { XmlUtil xmlUtil = XmlUtil.getInstance(); ArrayList<String> pbsStrArray = xmlUtil.evaluateToStringArray(documentNode, "//pb", null); if (pbsStrArray != null) { pbFileNamesArrayStr = new ArrayList<String>(); int countPBs = pbsStrArray.size(); for (int i=1; i<=countPBs; i++) { pbFileNamesArrayStr.add("" + i); // empty names for each page break } } } return pbFileNamesArrayStr; } public String getPageImgDir(MetadataRecord mdRecord) throws ApplicationException { String dcId = mdRecord.getIdentifier(); // dublin core identifier: is used to find the digilib image directory for this document String id = getIdByDCIdentifier(dcId); String imagesDocDirectory = "/permanent/library/" + id; if (mdRecord.hasArchimedesDocBase()) imagesDocDirectory = "/permanent/archimedes/" + id; String echoDir = mdRecord.getEchoDir(); if (echoDir != null) imagesDocDirectory = echoDir; String pageImgSubDir = "pageimg"; // default name: if digilib does not answer then this name is used String indexMetaPageImgDir = getIndexMetaDataPageImg(imagesDocDirectory); if (indexMetaPageImgDir != null) pageImgSubDir = indexMetaPageImgDir; String pageImgDir = imagesDocDirectory + "/" + pageImgSubDir; return pageImgDir; } private Node parse(File file) throws ApplicationException { XmlUtil xmlUtil = XmlUtil.getInstance(); Node retNode = null; try { retNode = xmlUtil.doc(file); } catch (ApplicationException e) { throw new ApplicationException("Your source file is not valid: " + e.getMessage()); } return retNode; } private void validate(Node docNode, String docBase) throws ApplicationException { XmlUtil xmlUtil = XmlUtil.getInstance(); NamespaceContext nsContext = getEchoNsContext(); String echoTest = null; String archimedesTest = null; try { echoTest = xmlUtil.evaluateToString(docNode, "/echo:echo/echo:metadata", nsContext); archimedesTest = xmlUtil.evaluateToString(docNode, "/archimedes/info", null); } catch (ApplicationException e) { throw new ApplicationException("Your source file is not an \"echo\" or \"archimedes\" file. Please proof that file."); } if (docBase.equals("echo") && archimedesTest != null) throw new ApplicationException("Your source file is an \"archimedes\" file. " + "Please specify \"archimedes\" in your destination document base."); if (docBase.equals("archimedes") && echoTest != null) throw new ApplicationException("Your source file is an \"echo\" file. " + "Please specify \"echo\" in your destination document base."); } private void validateByRelaxNGSchema(File destFile, String docBase) throws ApplicationException { XmlUtil xmlUtil = XmlUtil.getInstance(); if (docBase.equals("echo")) { URL echoSchemaUrl = getEchoRelaxNGSchemaUrl(); xmlUtil.validateByRelaxNG(destFile, echoSchemaUrl); } } private URL getEchoRelaxNGSchemaUrl() throws ApplicationException { String echoSchemaUrlStr = "http://" + MpdlConstants.MPDL_EXIST_HOST_NAME + ":" + MpdlConstants.MPDL_EXIST_PORT + MpdlConstants.MPDL_ECHO_RELAXNG_PATH; URL echoSchemaUrl = null; try { echoSchemaUrl = new URL(echoSchemaUrlStr); } catch (MalformedURLException e) { throw new ApplicationException(e); } return echoSchemaUrl; } private void validate(MetadataRecord mdRecord) throws ApplicationException { String identifier = mdRecord.getIdentifier(); String creator = mdRecord.getCreator(); String title = mdRecord.getTitle(); if (identifier == null || identifier.trim().equals("")) throw new ApplicationException("Your document file does not contain the metadata field: " + "identifier"); if (creator == null || creator.trim().equals("")) throw new ApplicationException("Your document file does not contain the metadata field: " + "creator"); if (title == null || title.trim().equals("")) throw new ApplicationException("Your document file does not contain the metadata field: " + "title"); } private MetadataRecord getMetadataRecordEcho(Node documentNode) throws ApplicationException { XmlUtil xmlUtil = XmlUtil.getInstance(); NamespaceContext nsContext = getEchoNsContext(); String identifier = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:identifier", nsContext); if (identifier != null) identifier = StringUtilEscapeChars.deresolveXmlEntities(identifier); String creator = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:creator", nsContext); if (creator != null) creator = StringUtilEscapeChars.deresolveXmlEntities(creator); String title = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:title", nsContext); if (title != null) title = StringUtilEscapeChars.deresolveXmlEntities(title); String language = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:language", nsContext); if (language != null) language = StringUtilEscapeChars.deresolveXmlEntities(language); String yearStr = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:date", nsContext); Date date = null; if (yearStr != null && ! yearStr.equals("")) { yearStr = StringUtilEscapeChars.deresolveXmlEntities(yearStr); yearStr = new Util().toYearStr(yearStr); // test if possible etc if (yearStr != null) date = XmlUtil.getInstance().toDate(yearStr + "-01-01T00:00:00.000Z"); } String rights = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:rights", nsContext); if (rights != null) rights = StringUtilEscapeChars.deresolveXmlEntities(rights); String license = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:license", nsContext); if (license != null) license = StringUtilEscapeChars.deresolveXmlEntities(license); String accessRights = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:accessRights", nsContext); if (accessRights != null) accessRights = StringUtilEscapeChars.deresolveXmlEntities(accessRights); String echoDir = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/echo:echodir", nsContext); if (echoDir != null) echoDir = StringUtilEscapeChars.deresolveXmlEntities(echoDir); String echoLink = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/echo:echolink", nsContext); if (echoLink != null) echoLink = StringUtilEscapeChars.deresolveXmlEntities(echoLink); MetadataRecord mdRecord = new MetadataRecord(identifier, language, creator, title, null, null, "text/xml", rights, date); mdRecord.setDocBase("echo"); mdRecord.setLicense(license); mdRecord.setAccessRights(accessRights); mdRecord.setEchoLink(echoLink); mdRecord.setEchoDir(echoDir); return mdRecord; } private MetadataRecord getMetadataRecordArchimedes(Node documentNode) throws ApplicationException { XmlUtil xmlUtil = XmlUtil.getInstance(); String identifier = xmlUtil.evaluateToString(documentNode, "/archimedes/info/cvs_file", null); if (identifier != null) identifier = StringUtilEscapeChars.deresolveXmlEntities(identifier); String creator = xmlUtil.evaluateToString(documentNode, "/archimedes/info/author", null); if (creator != null) creator = StringUtilEscapeChars.deresolveXmlEntities(creator); String title = xmlUtil.evaluateToString(documentNode, "/archimedes/info/title", null); if (title != null) title = StringUtilEscapeChars.deresolveXmlEntities(title); String language = xmlUtil.evaluateToString(documentNode, "/archimedes/info/lang", null); if (language != null) language = StringUtilEscapeChars.deresolveXmlEntities(language); String yearStr = xmlUtil.evaluateToString(documentNode, "/archimedes/info/date", null); Date date = null; if (yearStr != null && ! yearStr.equals("")) { yearStr = StringUtilEscapeChars.deresolveXmlEntities(yearStr); yearStr = new Util().toYearStr(yearStr); // test if possible etc if (yearStr != null) date = XmlUtil.getInstance().toDate(yearStr + "-01-01T00:00:00.000Z"); } String rights = "open access"; String license = "http://echo.mpiwg-berlin.mpg.de/policy/oa_basics/declaration"; String accessRights = "free"; MetadataRecord mdRecord = new MetadataRecord(identifier, language, creator, title, null, null, "text/xml", rights, date); mdRecord.setDocBase("archimedes"); mdRecord.setLicense(license); mdRecord.setAccessRights(accessRights); return mdRecord; } private String getIndexMetaDataPageImg(String imagesDocDirectory) throws ApplicationException { String resultStr = null; String nausikaaURLTexter = "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter"; XmlUtil xmlUtil = XmlUtil.getInstance(); String pageImageDirectory = null; try { Node imagesDocDirectoryIndexMetaNode = xmlUtil.doc(nausikaaURLTexter + "?fn=" + imagesDocDirectory + "/index.meta"); pageImageDirectory = xmlUtil.evaluateToString(imagesDocDirectoryIndexMetaNode, "/resource/meta/texttool/image", null); } catch (Exception e) { // return null if digilib does not work } if (pageImageDirectory != null) { resultStr = pageImageDirectory; } return resultStr; } private String getIdByExistId(String eXistIdentifier) { String id = null; if (eXistIdentifier == null) return null; int firstDelimPos = eXistIdentifier.indexOf("/", 2); int secondDelimPos = eXistIdentifier.indexOf("/", firstDelimPos + 1); int thirdDelimPos = eXistIdentifier.indexOf(".xml", secondDelimPos + 1); if (firstDelimPos == -1 || secondDelimPos == -1 || thirdDelimPos == -1) id = eXistIdentifier; else id = eXistIdentifier.substring(secondDelimPos + 1, thirdDelimPos); return id; } private String getIdByDCIdentifier(String dcIdentifier) { if (dcIdentifier == null || dcIdentifier.trim().equals("")) return null; // if dcIdentifier starts with "ECHO:" or "ARCHIMEDES:" then delete it if (dcIdentifier.startsWith("ECHO:")) dcIdentifier = dcIdentifier.substring(5); if (dcIdentifier.startsWith("ARCHIMEDES:")) dcIdentifier = dcIdentifier.substring(11); // delete the .xml suffix if there is one if (dcIdentifier.endsWith(".xml")) { int size = dcIdentifier.length(); dcIdentifier = dcIdentifier.substring(0, size - 4); } return dcIdentifier; } public NamespaceContext getEchoNsContext() { NamespaceContext nsContext = new NamespaceContext() { public String getNamespaceURI(String prefix) { String uri; if (prefix.equals("de")) uri = "http://www.mpiwg-berlin.mpg.de/ns/de/1.0/"; else if (prefix.equals("echo")) uri = "http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/"; else if (prefix.equals("dc")) uri = "http://purl.org/dc/elements/1.1/"; else if (prefix.equals("dcterms")) uri = "http://purl.org/dc/terms"; else if (prefix.equals("dcq")) uri = "http://purl.org/dc/qualifiers/1.0/"; else if (prefix.equals("xhtml")) uri = "http://www.w3.org/1999/xhtml"; else if (prefix.equals("dct")) uri = "http://purl.org/dc/terms/1.0/"; else if (prefix.equals("xlink")) uri = "http://www.w3.org/1999/xlink"; else if (prefix.equals("rdf")) uri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; else if (prefix.equals("xsi")) uri = "http://www.w3.org/2001/XMLSchema-instance"; else if (prefix.equals("mml")) uri = "http://www.w3.org/1998/Math/MathML"; else uri = null; return uri; } public String getPrefix(String uri) { if (uri.equals("http://www.mpiwg-berlin.mpg.de/ns/de/1.0/")) return "de"; else if (uri.equals("http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/")) return "echo"; else if (uri.equals("http://purl.org/dc/elements/1.1/")) return "dc"; else if (uri.equals("http://purl.org/dc/terms")) return "dcterms"; else if (uri.equals("http://purl.org/dc/qualifiers/1.0/")) return "dcq"; else if (uri.equals("http://www.w3.org/1999/xhtml")) return "xhtml"; else if (uri.equals("http://purl.org/dc/terms/1.0/")) return "dct"; else if (uri.equals("http://www.w3.org/1999/xlink")) return "xlink"; else if (uri.equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#")) return "rdf"; else if (uri.equals("http://www.w3.org/2001/XMLSchema-instance")) return "xsi"; else if (uri.equals("http://www.w3.org/1998/Math/MathML")) return "mml"; else return null; } public Iterator getPrefixes(String namespace) { return null; } }; return nsContext; } }