changeset 633:5d1534bd19b3

merge
author Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
date Thu, 02 Jul 2015 10:31:13 +0200
parents 4a75a760def2 (diff) 25295ceb11b1 (current diff)
children 618b600c805a
files
diffstat 3 files changed, 94 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/MpiwgXmlTextServer.py	Thu Jun 04 16:23:29 2015 +0200
+++ b/MpiwgXmlTextServer.py	Thu Jul 02 10:31:13 2015 +0200
@@ -303,8 +303,14 @@
             textmode = 'plain'
             textParams['outputFormat'] = 'html'
         
+        
+      
         try:
             # fetch the page
+            
+           
+            
+            
             pagexml = self.getServerData("query/GetPage",urllib.urlencode(textParams))
             dom = ET.fromstring(pagexml)
         except Exception, e:
@@ -371,6 +377,18 @@
                 wtag.remove(wtag.find("span[@class='nodictionary norm']"))
                 # delete non-matching children of a-tag and suppress remaining tag name
                 atag = wtag.find("*[@class='dictionary']")
+                
+                if atag is None: #nicht gefunden weil noch andere Eintraege im class tag
+                    for w in wtag.findall("a"):  
+                        val = w.attrib.get("class","")
+                        if val.startswith("dictionary"):
+                                atag=w
+                                break
+                
+                
+                
+                
+                
                 if normMode == 'orig':
                     atag.remove(atag.find("span[@class='reg']"))
                     atag.remove(atag.find("span[@class='norm']"))
@@ -386,7 +404,21 @@
                     
             else:
                 # delete a-tag
-                wtag.remove(wtag.find("*[@class='dictionary']"))
+               
+
+                wt =  wtag.find("*[@class='dictionary']")  
+                
+                if wt is None: #nicht gefunden weil noch andere Eintraege im class tag vorhanden sind
+                    for w in wtag.findall("a"):  
+                        val = w.attrib.get("class","")
+                        if val.startswith("dictionary"):
+                                wt=w
+                                break
+                
+                
+                
+                 
+                wtag.remove(wt)
                 # delete non-matching children and suppress remaining tag name
                 if normMode == 'orig':
                     wtag.remove(wtag.find("span[@class='nodictionary reg']"))
--- a/css/docuviewer.css	Thu Jun 04 16:23:29 2015 +0200
+++ b/css/docuviewer.css	Thu Jul 02 10:31:13 2015 +0200
@@ -3,6 +3,14 @@
  * 
  * Robert Casties 2012.
  */
+ 
+ 
+ .iliese {
+ background-color: lime;
+ }
+ 
+ 
+ 
 body {
     background-color: #ebebeb;
     margin: 5px;
--- a/documentViewer.py	Thu Jun 04 16:23:29 2015 +0200
+++ b/documentViewer.py	Thu Jul 02 10:31:13 2015 +0200
@@ -19,6 +19,20 @@
 from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml, sslifyUrl
     
 
+INDEXMETA_NS="http://md.mpiwg-berlin.mpg.de/ns/indexMeta#"
+
+def removeINDEXMETA_NS(root): #entfernt den namespace von indexmeta aus dem dom #TODO evertyhing should be changed that it can deal with NS
+    for elem in root.getiterator():
+        print ("ETAG")
+        print(elem.tag)
+        if not hasattr(elem.tag, 'find'): continue  # (1)
+        
+        i = elem.tag.find('{%s}'%INDEXMETA_NS)
+        if i >= 0:
+            elem.tag = elem.tag[i+len(('{%s}'%INDEXMETA_NS)):]
+
+        print(elem.tag)
+
 def getMDText(node):
     """returns the @text content from the MetaDataProvider metadata node"""
     if isinstance(node, dict):
@@ -529,18 +543,52 @@
         if mode=="texttool": 
             # url points to document dir or index.meta
             metaDom = self.metadataService.getDomFromPathOrUrl(url)
+            removeINDEXMETA_NS(metaDom)
+
             if metaDom is None:
                 raise IOError("Unable to find index.meta for mode=texttool!")
             
             docUrl = url.replace('/index.meta', '')
             if url.startswith('/mpiwg/online/'):
                 docUrl = url.replace('/mpiwg/online/', '', 1)
+        elif mode=="textpath":
+            #url points to an textfile
+            #index.meta optional
+            #assume index.meta in parent dir
+            docUrl = getParentPath(url)
+            docinfo['viewmode'] = "text"
+            
+            try:
+                metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
+
+                removeINDEXMETA_NS(metaDom)
+
+
+                
+            except:
+                metaDom = None
+            #metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
+            #docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
+            docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1)
+            docinfo['textURL'] = url
+            if docinfo.get("creator", None) is None:
+                docinfo['creator'] = "" 
+            
+            if docinfo.get("title", None) is None:
+                docinfo['title'] = "" 
+
+            if docinfo.get("documentPath", None) is None:
+                docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1)
+                docinfo['documentPath'] = url.replace('/pages', '', 1)
+                
+            docinfo['numPages'] = 1
 
         elif mode=="imagepath":
             # url points to folder with images, index.meta optional
             # asssume index.meta in parent dir
-            docUrl = getParentPath(url)
+            
             metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
+            
             docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
             
         elif mode=="hocr":
@@ -574,12 +622,14 @@
         
         docinfo['documentUrl'] = docUrl
         # process index.meta contents
-        if metaDom is not None and metaDom.tag == 'resource':
+        
+        if metaDom is not None and (metaDom.tag == 'resource' or metaDom.tag == "{%s}resource"%INDEXMETA_NS):
+            print("MD")
             # document directory name and path
             resource = self.metadataService.getResourceData(dom=metaDom, recursive=1)
             if resource:
                 docinfo = self.getDocinfoFromResource(docinfo, resource)
-
+           
             # texttool info
             texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
             if texttool: