# HG changeset patch # User Dirk Wintergruen # Date 1435825873 -7200 # Node ID 5d1534bd19b3f655995026732139af5e12cf9069 # Parent 4a75a760def2668ba0886a9ee8ddf0fe8b732d9b# Parent 25295ceb11b1e52149464f53aa9296c065ecedcf merge diff -r 25295ceb11b1 -r 5d1534bd19b3 MpiwgXmlTextServer.py --- a/MpiwgXmlTextServer.py Thu Jun 04 16:23:29 2015 +0200 +++ b/MpiwgXmlTextServer.py Thu Jul 02 10:31:13 2015 +0200 @@ -303,8 +303,14 @@ textmode = 'plain' textParams['outputFormat'] = 'html' + + try: # fetch the page + + + + pagexml = self.getServerData("query/GetPage",urllib.urlencode(textParams)) dom = ET.fromstring(pagexml) except Exception, e: @@ -371,6 +377,18 @@ wtag.remove(wtag.find("span[@class='nodictionary norm']")) # delete non-matching children of a-tag and suppress remaining tag name atag = wtag.find("*[@class='dictionary']") + + if atag is None: #nicht gefunden weil noch andere Eintraege im class tag + for w in wtag.findall("a"): + val = w.attrib.get("class","") + if val.startswith("dictionary"): + atag=w + break + + + + + if normMode == 'orig': atag.remove(atag.find("span[@class='reg']")) atag.remove(atag.find("span[@class='norm']")) @@ -386,7 +404,21 @@ else: # delete a-tag - wtag.remove(wtag.find("*[@class='dictionary']")) + + + wt = wtag.find("*[@class='dictionary']") + + if wt is None: #nicht gefunden weil noch andere Eintraege im class tag vorhanden sind + for w in wtag.findall("a"): + val = w.attrib.get("class","") + if val.startswith("dictionary"): + wt=w + break + + + + + wtag.remove(wt) # delete non-matching children and suppress remaining tag name if normMode == 'orig': wtag.remove(wtag.find("span[@class='nodictionary reg']")) diff -r 25295ceb11b1 -r 5d1534bd19b3 css/docuviewer.css --- a/css/docuviewer.css Thu Jun 04 16:23:29 2015 +0200 +++ b/css/docuviewer.css Thu Jul 02 10:31:13 2015 +0200 @@ -3,6 +3,14 @@ * * Robert Casties 2012. */ + + + .iliese { + background-color: lime; + } + + + body { background-color: #ebebeb; margin: 5px; diff -r 25295ceb11b1 -r 5d1534bd19b3 documentViewer.py --- a/documentViewer.py Thu Jun 04 16:23:29 2015 +0200 +++ b/documentViewer.py Thu Jul 02 10:31:13 2015 +0200 @@ -19,6 +19,20 @@ from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml, sslifyUrl +INDEXMETA_NS="http://md.mpiwg-berlin.mpg.de/ns/indexMeta#" + +def removeINDEXMETA_NS(root): #entfernt den namespace von indexmeta aus dem dom #TODO evertyhing should be changed that it can deal with NS + for elem in root.getiterator(): + print ("ETAG") + print(elem.tag) + if not hasattr(elem.tag, 'find'): continue # (1) + + i = elem.tag.find('{%s}'%INDEXMETA_NS) + if i >= 0: + elem.tag = elem.tag[i+len(('{%s}'%INDEXMETA_NS)):] + + print(elem.tag) + def getMDText(node): """returns the @text content from the MetaDataProvider metadata node""" if isinstance(node, dict): @@ -529,18 +543,52 @@ if mode=="texttool": # url points to document dir or index.meta metaDom = self.metadataService.getDomFromPathOrUrl(url) + removeINDEXMETA_NS(metaDom) + if metaDom is None: raise IOError("Unable to find index.meta for mode=texttool!") docUrl = url.replace('/index.meta', '') if url.startswith('/mpiwg/online/'): docUrl = url.replace('/mpiwg/online/', '', 1) + elif mode=="textpath": + #url points to an textfile + #index.meta optional + #assume index.meta in parent dir + docUrl = getParentPath(url) + docinfo['viewmode'] = "text" + + try: + metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) + + removeINDEXMETA_NS(metaDom) + + + + except: + metaDom = None + #metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) + #docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) + docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1) + docinfo['textURL'] = url + if docinfo.get("creator", None) is None: + docinfo['creator'] = "" + + if docinfo.get("title", None) is None: + docinfo['title'] = "" + + if docinfo.get("documentPath", None) is None: + docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1) + docinfo['documentPath'] = url.replace('/pages', '', 1) + + docinfo['numPages'] = 1 elif mode=="imagepath": # url points to folder with images, index.meta optional # asssume index.meta in parent dir - docUrl = getParentPath(url) + metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) + docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) elif mode=="hocr": @@ -574,12 +622,14 @@ docinfo['documentUrl'] = docUrl # process index.meta contents - if metaDom is not None and metaDom.tag == 'resource': + + if metaDom is not None and (metaDom.tag == 'resource' or metaDom.tag == "{%s}resource"%INDEXMETA_NS): + print("MD") # document directory name and path resource = self.metadataService.getResourceData(dom=metaDom, recursive=1) if resource: docinfo = self.getDocinfoFromResource(docinfo, resource) - + # texttool info texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True) if texttool: