--- ECHO_content/vlp_xmlhelpers.py 2006/11/22 17:21:10 1.10
+++ ECHO_content/vlp_xmlhelpers.py 2007/07/24 09:11:46 1.11
@@ -1,6 +1,7 @@
from sys import argv
import string
+import logging
import xml.dom.minidom
import Ft.Xml.XLink.Processor
import Ft.Xml.XLink.XLinkElements
@@ -15,6 +16,7 @@ from Ft.Xml import EMPTY_NAMESPACE
from Ft.Lib import Uri
import urllib
import re
+from ECHO_collection import unicodify,utf8ify
patternTXT=r"<\s*txt.*?>(.*?)"
regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL)
@@ -175,7 +177,8 @@ def checkRef(self,ref):
'vl_people':'AND complete =\'yes\'',
'vl_sites':'AND complete =\'yes\'',
'vl_transcript':'AND complete =\'yes\'',
- 'vl_essays':'AND online =\'yes\''
+ 'vl_essays':'AND online =\'yes\'',
+ 'vl_categories':''
}
res=None
for db in dbs.keys():
@@ -188,7 +191,7 @@ def link2html(self,str):
if str:
str=re.sub("\&","&",str)
- dom=xml.dom.minidom.parseString(""+str+"")
+ dom=xml.dom.minidom.parseString(""+utf8ify(str)+"")
links=dom.getElementsByTagName("link")
@@ -207,11 +210,11 @@ def link2html(self,str):
newxml=dom.toxml('utf-8')
retStr=regexpTXT.search(newxml)
+ retStr = retStr.group(1)
- return retStr.group(1)
+ return retStr.decode('utf-8') # we return unicode
-
- return ""
+ return u""
def related2html(self,str):
"""related library items: xlinks in html wandeln / mb 22.11.2006"""
@@ -258,10 +261,11 @@ def related2html(self,str):
newxml=dom.toxml('utf-8')
retStr=regexpTXT.search(newxml)
-
- return retStr.group(1)
+ retStr = retStr.group(1)
+
+ return retStr.decode('utf-8') # we return unicode
- return ""
+ return u""
@@ -308,7 +312,7 @@ def xml2html(self,str,quote="yes"):
retStr=regexpPage.search(str)
try: # hack warum fehtl manchmal page??
- return retStr.group(1)
+ return retStr.group(1).decode('utf-8')
except:
return str
return ""