Mercurial > hg > documentViewer
changeset 631:0c3aab828864
remove index meta ns
author | Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de> |
---|---|
date | Thu, 02 Jul 2015 10:27:05 +0200 |
parents | e36bf3226fde |
children | 4a75a760def2 |
files | documentViewer.py |
diffstat | 1 files changed, 53 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/documentViewer.py Tue May 26 10:58:27 2015 +0200 +++ b/documentViewer.py Thu Jul 02 10:27:05 2015 +0200 @@ -19,6 +19,20 @@ from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml, sslifyUrl +INDEXMETA_NS="http://md.mpiwg-berlin.mpg.de/ns/indexMeta#" + +def removeINDEXMETA_NS(root): #entfernt den namespace von indexmeta aus dem dom #TODO evertyhing should be changed that it can deal with NS + for elem in root.getiterator(): + print ("ETAG") + print(elem.tag) + if not hasattr(elem.tag, 'find'): continue # (1) + + i = elem.tag.find('{%s}'%INDEXMETA_NS) + if i >= 0: + elem.tag = elem.tag[i+len(('{%s}'%INDEXMETA_NS)):] + + print(elem.tag) + def getMDText(node): """returns the @text content from the MetaDataProvider metadata node""" if isinstance(node, dict): @@ -529,18 +543,52 @@ if mode=="texttool": # url points to document dir or index.meta metaDom = self.metadataService.getDomFromPathOrUrl(url) + removeINDEXMETA_NS(metaDom) + if metaDom is None: raise IOError("Unable to find index.meta for mode=texttool!") docUrl = url.replace('/index.meta', '') if url.startswith('/mpiwg/online/'): docUrl = url.replace('/mpiwg/online/', '', 1) + elif mode=="textpath": + #url points to an textfile + #index.meta optional + #assume index.meta in parent dir + docUrl = getParentPath(url) + docinfo['viewmode'] = "text" + + try: + metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) + + removeINDEXMETA_NS(metaDom) + + + + except: + metaDom = None + #metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) + #docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) + docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1) + docinfo['textURL'] = url + if docinfo.get("creator", None) is None: + docinfo['creator'] = "" + + if docinfo.get("title", None) is None: + docinfo['title'] = "" + + if docinfo.get("documentPath", None) is None: + docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1) + docinfo['documentPath'] = url.replace('/pages', '', 1) + + docinfo['numPages'] = 1 elif mode=="imagepath": # url points to folder with images, index.meta optional # asssume index.meta in parent dir - docUrl = getParentPath(url) + metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) + docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) elif mode=="hocr": @@ -574,12 +622,14 @@ docinfo['documentUrl'] = docUrl # process index.meta contents - if metaDom is not None and metaDom.tag == 'resource': + + if metaDom is not None and (metaDom.tag == 'resource' or metaDom.tag == "{%s}resource"%INDEXMETA_NS): + print("MD") # document directory name and path resource = self.metadataService.getResourceData(dom=metaDom, recursive=1) if resource: docinfo = self.getDocinfoFromResource(docinfo, resource) - + # texttool info texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True) if texttool: