changeset 514:c55e376be01b

search works even with unicode...
author casties
date Tue, 28 Feb 2012 19:46:37 +0100
parents 67095296c95a
children 0afba3afd538
files SrvTxtUtils.py documentViewer.py version.txt zpt/viewer_images.zpt zpt/viewer_text.zpt
diffstat 5 files changed, 29 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/SrvTxtUtils.py	Tue Feb 28 19:10:08 2012 +0100
+++ b/SrvTxtUtils.py	Tue Feb 28 19:46:37 2012 +0100
@@ -11,7 +11,7 @@
 import logging
 
 
-srvTxtUtilsVersion = "1.3"
+srvTxtUtilsVersion = "1.4"
 
 def getInt(number, default=0):
     """returns always an int (0 in case of problems)"""
@@ -27,6 +27,28 @@
     except:
         return default
 
+def unicodify(s):
+    """decode str (utf-8 or latin-1 representation) into unicode object"""
+    if not s:
+        return u""
+    if isinstance(s, str):
+        try:
+            return s.decode('utf-8')
+        except:
+            return s.decode('latin-1')
+    else:
+        return s
+
+def utf8ify(s):
+    """encode unicode object or string into byte string in utf-8 representation.
+       assumes string objects to be utf-8"""
+    if not s:
+        return ""
+    if isinstance(s, str):
+        return s
+    else:
+        return s.encode('utf-8')
+
 def getText(node, recursive=0):
     """returns all text content of a node and its subnodes"""
     if node is None:
--- a/documentViewer.py	Tue Feb 28 19:10:08 2012 +0100
+++ b/documentViewer.py	Tue Feb 28 19:46:37 2012 +0100
@@ -17,7 +17,7 @@
 import re
 import string
 
-from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml
+from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml
     
 def serializeNode(node, encoding="utf-8"):
     """returns a string containing node as XML"""
@@ -381,7 +381,7 @@
         """returns URL to documentviewer with parameter param set to val or from dict params"""
         urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates)
         # quote values and assemble into query string (not escaping '/')
-        ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()])
+        ps = paramSep.join(["%s=%s"%(k, urllib.quote_plus(utf8ify(v), '/')) for (k, v) in urlParams.items()])
         if baseUrl is None:
             baseUrl = self.getDocumentViewerURL()
             
@@ -396,7 +396,7 @@
     def getInfo_xml(self,url,mode):
         """returns info about the document as XML"""
         if not self.digilibBaseUrl:
-            self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
+            self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
         
         docinfo = self.getDocinfo(mode=mode,url=url)
         pt = getattr(self.template, 'info_xml')
--- a/version.txt	Tue Feb 28 19:10:08 2012 +0100
+++ b/version.txt	Tue Feb 28 19:46:37 2012 +0100
@@ -1,1 +1,1 @@
-DocumentViewer 2.0a
\ No newline at end of file
+DocumentViewer 2.0b
\ No newline at end of file
--- a/zpt/viewer_images.zpt	Tue Feb 28 19:10:08 2012 +0100
+++ b/zpt/viewer_images.zpt	Tue Feb 28 19:46:37 2012 +0100
@@ -5,7 +5,7 @@
               tocMode pageinfo/tocMode; viewerUrl docinfo/viewerUrl;
               numPages docinfo/numPages | nothing; dlBaseUrl docinfo/digilibBaseUrl | nothing;">
 <head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
   <title
     tal:content="python:docinfo.get('creator',' ') + ' - ' + docinfo.get('title',' ')" />
   <link rel="stylesheet" href="template/docuviewer_css" type="text/css" />
--- a/zpt/viewer_text.zpt	Tue Feb 28 19:10:08 2012 +0100
+++ b/zpt/viewer_text.zpt	Tue Feb 28 19:46:37 2012 +0100
@@ -7,7 +7,7 @@
               rootUrl here/getDocumentViewerURL;
               numPages docinfo/numPages | nothing;">
 <head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
   <title
     tal:content="python:docinfo.get('creator',' ') + ' - ' + docinfo.get('title',' ')" />
   <link rel="stylesheet" href="template/docuviewer_css" type="text/css" />