Mercurial > hg > documentViewer

--- a/documentViewer.py	Tue May 26 10:58:27 2015 +0200
+++ b/documentViewer.py	Thu Jul 02 10:27:05 2015 +0200
@@ -19,6 +19,20 @@
 from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml, sslifyUrl


+INDEXMETA_NS="http://md.mpiwg-berlin.mpg.de/ns/indexMeta#"
+
+def removeINDEXMETA_NS(root): #entfernt den namespace von indexmeta aus dem dom #TODO evertyhing should be changed that it can deal with NS
+    for elem in root.getiterator():
+        print ("ETAG")
+        print(elem.tag)
+        if not hasattr(elem.tag, 'find'): continue  # (1)
+
+        i = elem.tag.find('{%s}'%INDEXMETA_NS)
+        if i >= 0:
+            elem.tag = elem.tag[i+len(('{%s}'%INDEXMETA_NS)):]
+
+        print(elem.tag)
+
 def getMDText(node):
     """returns the @text content from the MetaDataProvider metadata node"""
     if isinstance(node, dict):
@@ -529,18 +543,52 @@
         if mode=="texttool":
             # url points to document dir or index.meta
             metaDom = self.metadataService.getDomFromPathOrUrl(url)
+            removeINDEXMETA_NS(metaDom)
+
             if metaDom is None:
                 raise IOError("Unable to find index.meta for mode=texttool!")

             docUrl = url.replace('/index.meta', '')
             if url.startswith('/mpiwg/online/'):
                 docUrl = url.replace('/mpiwg/online/', '', 1)
+        elif mode=="textpath":
+            #url points to an textfile
+            #index.meta optional
+            #assume index.meta in parent dir
+            docUrl = getParentPath(url)
+            docinfo['viewmode'] = "text"
+
+            try:
+                metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
+
+                removeINDEXMETA_NS(metaDom)
+
+
+
+            except:
+                metaDom = None
+            #metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
+            #docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
+            docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1)
+            docinfo['textURL'] = url
+            if docinfo.get("creator", None) is None:
+                docinfo['creator'] = ""
+
+            if docinfo.get("title", None) is None:
+                docinfo['title'] = ""
+
+            if docinfo.get("documentPath", None) is None:
+                docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1)
+                docinfo['documentPath'] = url.replace('/pages', '', 1)
+
+            docinfo['numPages'] = 1

         elif mode=="imagepath":
             # url points to folder with images, index.meta optional
             # asssume index.meta in parent dir
-            docUrl = getParentPath(url)
+
             metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
+
             docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)

         elif mode=="hocr":
@@ -574,12 +622,14 @@

         docinfo['documentUrl'] = docUrl
         # process index.meta contents
-        if metaDom is not None and metaDom.tag == 'resource':
+
+        if metaDom is not None and (metaDom.tag == 'resource' or metaDom.tag == "{%s}resource"%INDEXMETA_NS):
+            print("MD")
             # document directory name and path
             resource = self.metadataService.getResourceData(dom=metaDom, recursive=1)
             if resource:
                 docinfo = self.getDocinfoFromResource(docinfo, resource)
-
+
             # texttool info
             texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
             if texttool: