# HG changeset patch # User casties # Date 1330454797 -3600 # Node ID c55e376be01bb68e1a4400a7a193758e9f5c863c # Parent 67095296c95a4a586d23fa9e442eab39245ca0c0 search works even with unicode... diff -r 67095296c95a -r c55e376be01b SrvTxtUtils.py --- a/SrvTxtUtils.py Tue Feb 28 19:10:08 2012 +0100 +++ b/SrvTxtUtils.py Tue Feb 28 19:46:37 2012 +0100 @@ -11,7 +11,7 @@ import logging -srvTxtUtilsVersion = "1.3" +srvTxtUtilsVersion = "1.4" def getInt(number, default=0): """returns always an int (0 in case of problems)""" @@ -27,6 +27,28 @@ except: return default +def unicodify(s): + """decode str (utf-8 or latin-1 representation) into unicode object""" + if not s: + return u"" + if isinstance(s, str): + try: + return s.decode('utf-8') + except: + return s.decode('latin-1') + else: + return s + +def utf8ify(s): + """encode unicode object or string into byte string in utf-8 representation. + assumes string objects to be utf-8""" + if not s: + return "" + if isinstance(s, str): + return s + else: + return s.encode('utf-8') + def getText(node, recursive=0): """returns all text content of a node and its subnodes""" if node is None: diff -r 67095296c95a -r c55e376be01b documentViewer.py --- a/documentViewer.py Tue Feb 28 19:10:08 2012 +0100 +++ b/documentViewer.py Tue Feb 28 19:46:37 2012 +0100 @@ -17,7 +17,7 @@ import re import string -from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml +from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml def serializeNode(node, encoding="utf-8"): """returns a string containing node as XML""" @@ -381,7 +381,7 @@ """returns URL to documentviewer with parameter param set to val or from dict params""" urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates) # quote values and assemble into query string (not escaping '/') - ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()]) + ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()]) if baseUrl is None: baseUrl = self.getDocumentViewerURL() @@ -396,7 +396,7 @@ def getInfo_xml(self,url,mode): """returns info about the document as XML""" if not self.digilibBaseUrl: - self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" + self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) pt = getattr(self.template, 'info_xml') diff -r 67095296c95a -r c55e376be01b version.txt --- a/version.txt Tue Feb 28 19:10:08 2012 +0100 +++ b/version.txt Tue Feb 28 19:46:37 2012 +0100 @@ -1,1 +1,1 @@ -DocumentViewer 2.0a \ No newline at end of file +DocumentViewer 2.0b \ No newline at end of file diff -r 67095296c95a -r c55e376be01b zpt/viewer_images.zpt --- a/zpt/viewer_images.zpt Tue Feb 28 19:10:08 2012 +0100 +++ b/zpt/viewer_images.zpt Tue Feb 28 19:46:37 2012 +0100 @@ -5,7 +5,7 @@ tocMode pageinfo/tocMode; viewerUrl docinfo/viewerUrl; numPages docinfo/numPages | nothing; dlBaseUrl docinfo/digilibBaseUrl | nothing;"> - + <link rel="stylesheet" href="template/docuviewer_css" type="text/css" /> diff -r 67095296c95a -r c55e376be01b zpt/viewer_text.zpt --- a/zpt/viewer_text.zpt Tue Feb 28 19:10:08 2012 +0100 +++ b/zpt/viewer_text.zpt Tue Feb 28 19:46:37 2012 +0100 @@ -7,7 +7,7 @@ rootUrl here/getDocumentViewerURL; numPages docinfo/numPages | nothing;"> <head> -<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> <title tal:content="python:docinfo.get('creator',' ') + ' - ' + docinfo.get('title',' ')" /> <link rel="stylesheet" href="template/docuviewer_css" type="text/css" />