--- ECHO_content/vlp_xmlhelpers.py 2006/11/22 17:21:10 1.10 +++ ECHO_content/vlp_xmlhelpers.py 2007/07/24 09:11:46 1.11 @@ -1,6 +1,7 @@ from sys import argv import string +import logging import xml.dom.minidom import Ft.Xml.XLink.Processor import Ft.Xml.XLink.XLinkElements @@ -15,6 +16,7 @@ from Ft.Xml import EMPTY_NAMESPACE from Ft.Lib import Uri import urllib import re +from ECHO_collection import unicodify,utf8ify patternTXT=r"<\s*txt.*?>(.*?)" regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL) @@ -175,7 +177,8 @@ def checkRef(self,ref): 'vl_people':'AND complete =\'yes\'', 'vl_sites':'AND complete =\'yes\'', 'vl_transcript':'AND complete =\'yes\'', - 'vl_essays':'AND online =\'yes\'' + 'vl_essays':'AND online =\'yes\'', + 'vl_categories':'' } res=None for db in dbs.keys(): @@ -188,7 +191,7 @@ def link2html(self,str): if str: str=re.sub("\&","&",str) - dom=xml.dom.minidom.parseString(""+str+"") + dom=xml.dom.minidom.parseString(""+utf8ify(str)+"") links=dom.getElementsByTagName("link") @@ -207,11 +210,11 @@ def link2html(self,str): newxml=dom.toxml('utf-8') retStr=regexpTXT.search(newxml) + retStr = retStr.group(1) - return retStr.group(1) + return retStr.decode('utf-8') # we return unicode - - return "" + return u"" def related2html(self,str): """related library items: xlinks in html wandeln / mb 22.11.2006""" @@ -258,10 +261,11 @@ def related2html(self,str): newxml=dom.toxml('utf-8') retStr=regexpTXT.search(newxml) - - return retStr.group(1) + retStr = retStr.group(1) + + return retStr.decode('utf-8') # we return unicode - return "" + return u"" @@ -308,7 +312,7 @@ def xml2html(self,str,quote="yes"): retStr=regexpPage.search(str) try: # hack warum fehtl manchmal page?? - return retStr.group(1) + return retStr.group(1).decode('utf-8') except: return str return ""