# HG changeset patch # User Dirk Wintergruen # Date 1435825625 -7200 # Node ID 0c3aab82886445ef4506640f0fddc27cef2af659 # Parent e36bf3226fdea2908c3d3ba02b6c55318fbc063e remove index meta ns diff -r e36bf3226fde -r 0c3aab828864 documentViewer.py --- a/documentViewer.py Tue May 26 10:58:27 2015 +0200 +++ b/documentViewer.py Thu Jul 02 10:27:05 2015 +0200 @@ -19,6 +19,20 @@ from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml, sslifyUrl +INDEXMETA_NS="http://md.mpiwg-berlin.mpg.de/ns/indexMeta#" + +def removeINDEXMETA_NS(root): #entfernt den namespace von indexmeta aus dem dom #TODO evertyhing should be changed that it can deal with NS + for elem in root.getiterator(): + print ("ETAG") + print(elem.tag) + if not hasattr(elem.tag, 'find'): continue # (1) + + i = elem.tag.find('{%s}'%INDEXMETA_NS) + if i >= 0: + elem.tag = elem.tag[i+len(('{%s}'%INDEXMETA_NS)):] + + print(elem.tag) + def getMDText(node): """returns the @text content from the MetaDataProvider metadata node""" if isinstance(node, dict): @@ -529,18 +543,52 @@ if mode=="texttool": # url points to document dir or index.meta metaDom = self.metadataService.getDomFromPathOrUrl(url) + removeINDEXMETA_NS(metaDom) + if metaDom is None: raise IOError("Unable to find index.meta for mode=texttool!") docUrl = url.replace('/index.meta', '') if url.startswith('/mpiwg/online/'): docUrl = url.replace('/mpiwg/online/', '', 1) + elif mode=="textpath": + #url points to an textfile + #index.meta optional + #assume index.meta in parent dir + docUrl = getParentPath(url) + docinfo['viewmode'] = "text" + + try: + metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) + + removeINDEXMETA_NS(metaDom) + + + + except: + metaDom = None + #metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) + #docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) + docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1) + docinfo['textURL'] = url + if docinfo.get("creator", None) is None: + docinfo['creator'] = "" + + if docinfo.get("title", None) is None: + docinfo['title'] = "" + + if docinfo.get("documentPath", None) is None: + docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1) + docinfo['documentPath'] = url.replace('/pages', '', 1) + + docinfo['numPages'] = 1 elif mode=="imagepath": # url points to folder with images, index.meta optional # asssume index.meta in parent dir - docUrl = getParentPath(url) + metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) + docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) elif mode=="hocr": @@ -574,12 +622,14 @@ docinfo['documentUrl'] = docUrl # process index.meta contents - if metaDom is not None and metaDom.tag == 'resource': + + if metaDom is not None and (metaDom.tag == 'resource' or metaDom.tag == "{%s}resource"%INDEXMETA_NS): + print("MD") # document directory name and path resource = self.metadataService.getResourceData(dom=metaDom, recursive=1) if resource: docinfo = self.getDocinfoFromResource(docinfo, resource) - + # texttool info texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True) if texttool: