|
|
| version 1.69.2.3, 2010/06/16 18:27:04 | version 1.99, 2010/09/01 13:25:38 |
|---|---|
| Line 2 | Line 2 |
| from OFS.Folder import Folder | from OFS.Folder import Folder |
| from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate | from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate |
| from Products.PageTemplates.PageTemplateFile import PageTemplateFile | from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
| from Products.PythonScripts.standard import url_quote | |
| from AccessControl import ClassSecurityInfo | from AccessControl import ClassSecurityInfo |
| from AccessControl import getSecurityManager | from AccessControl import getSecurityManager |
| from Globals import package_home | from Globals import package_home |
| from Ft.Xml.Domlette import NonvalidatingReader | |
| from Ft.Xml.Domlette import PrettyPrint, Print | |
| from Ft.Xml import EMPTY_NAMESPACE, Parse | from Ft.Xml import EMPTY_NAMESPACE, Parse |
| import Ft.Xml.Domlette | |
| from xml.dom.minidom import parse, parseString | |
| from extraFunction import * | |
| import Ft.Xml.XPath | |
| import cStringIO | |
| import xmlrpclib | |
| import os.path | import os.path |
| import sys | import sys |
| import cgi | |
| import urllib | import urllib |
| import urllib2 | |
| import logging | import logging |
| import math | import math |
| import urlparse | import urlparse |
| from types import * | import cStringIO |
| import re | |
| def logger(txt,method,txt2): | def logger(txt,method,txt2): |
| """logging""" | """logging""" |
| Line 55 def getTextFromNode(nodename): | Line 44 def getTextFromNode(nodename): |
| def serializeNode(node, encoding='utf-8'): | def serializeNode(node, encoding='utf-8'): |
| """returns a string containing node as XML""" | """returns a string containing node as XML""" |
| buf = cStringIO.StringIO() | buf = cStringIO.StringIO() |
| Print(node, stream=buf, encoding=encoding) | Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding) |
| s = buf.getvalue() | s = buf.getvalue() |
| buf.close() | buf.close() |
| return s | return s |
| Line 84 def getHttpData(url, data=None, num_trie | Line 73 def getHttpData(url, data=None, num_trie |
| if sys.version_info < (2, 6): | if sys.version_info < (2, 6): |
| # set timeout on socket -- ugly :-( | # set timeout on socket -- ugly :-( |
| import socket | import socket |
| socket.setdefaulttimeout(timeout) | socket.setdefaulttimeout(float(timeout)) |
| response = urllib2.urlopen(url) | response = urllib2.urlopen(url) |
| else: | else: |
| response = urllib2.urlopen(url,timeout=float(timeout)) | response = urllib2.urlopen(url,timeout=float(timeout)) |
| Line 131 class documentViewer(Folder): | Line 120 class documentViewer(Folder): |
| page_main_images = PageTemplateFile('zpt/page_main_images', globals()) | page_main_images = PageTemplateFile('zpt/page_main_images', globals()) |
| page_main_text = PageTemplateFile('zpt/page_main_text', globals()) | page_main_text = PageTemplateFile('zpt/page_main_text', globals()) |
| page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) | page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) |
| page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals()) | |
| page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) | page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) |
| head_main = PageTemplateFile('zpt/head_main', globals()) | head_main = PageTemplateFile('zpt/head_main', globals()) |
| docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) | docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) |
| Line 157 class documentViewer(Folder): | Line 147 class documentViewer(Folder): |
| self._setObject('template',templateFolder) # old style | self._setObject('template',templateFolder) # old style |
| try: | try: |
| import MpdlXmlTextServer | import MpdlXmlTextServer |
| textServer = MpdlXmlTextServer(id='fulltextclient') | textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName) |
| #templateFolder['fulltextclient'] = xmlRpcClient | #templateFolder['fulltextclient'] = xmlRpcClient |
| templateFolder._setObject('fulltextclient',textServer) | templateFolder._setObject('fulltextclient',textServer) |
| except Exception, e: | except Exception, e: |
| Line 184 class documentViewer(Folder): | Line 174 class documentViewer(Folder): |
| """get search""" | """get search""" |
| return self.template.fulltextclient.getSearch(**args) | return self.template.fulltextclient.getSearch(**args) |
| def getNumPages(self, **args): | def getNumPages(self, docinfo): |
| """get numpages""" | """get numpages""" |
| return self.template.fulltextclient.getNumPages(**args) | return self.template.fulltextclient.getNumPages(docinfo) |
| def getTranslate(self, **args): | def getTranslate(self, **args): |
| """get translate""" | """get translate""" |
| Line 229 class documentViewer(Folder): | Line 219 class documentViewer(Folder): |
| pt = getattr(self.template, 'thumbs_main_rss') | pt = getattr(self.template, 'thumbs_main_rss') |
| if viewMode=="auto": # automodus gewaehlt | if viewMode=="auto": # automodus gewaehlt |
| if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert | if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert |
| viewMode="text" | viewMode="text" |
| else: | else: |
| viewMode="images" | viewMode="images" |
| Line 237 class documentViewer(Folder): | Line 227 class documentViewer(Folder): |
| return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) | return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) |
| security.declareProtected('View','index_html') | security.declareProtected('View','index_html') |
| def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None): | def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""): |
| ''' | ''' |
| view it | view it |
| @param mode: defines how to access the document behind url | @param mode: defines how to access the document behind url |
| @param url: url which contains display information | @param url: url which contains display information |
| @param viewMode: if images display images, if text display text, default is auto (text,images or auto) | @param viewMode: if images display images, if text display text, default is auto (text,images or auto) |
| @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) | @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) |
| @param characterNormalization type of text display (reg, norm, none) | |
| @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) | @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) |
| ''' | ''' |
| Line 255 class documentViewer(Folder): | Line 246 class documentViewer(Folder): |
| return "ERROR: template folder missing!" | return "ERROR: template folder missing!" |
| if not getattr(self, 'digilibBaseUrl', None): | if not getattr(self, 'digilibBaseUrl', None): |
| self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" | self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" |
| docinfo = self.getDocinfo(mode=mode,url=url) | docinfo = self.getDocinfo(mode=mode,url=url) |
| if tocMode != "thumbs": | if tocMode != "thumbs": |
| # get table of contents | # get table of contents |
| docinfo = self.getToc(mode=tocMode, docinfo=docinfo) | docinfo = self.getToc(mode=tocMode, docinfo=docinfo) |
| if viewMode=="auto": # automodus gewaehlt | if viewMode=="auto": # automodus gewaehlt |
| if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert | if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert |
| viewMode="text_dict" | viewMode="text_dict" |
| else: | else: |
| viewMode="images" | viewMode="images" |
| Line 279 class documentViewer(Folder): | Line 269 class documentViewer(Folder): |
| ret="" | ret="" |
| if mk is None: | if mk is None: |
| return "" | return "" |
| if type(mk) is not ListType: | if not isinstance(mk, list): |
| mk=[mk] | mk=[mk] |
| for m in mk: | for m in mk: |
| ret+="mk=%s"%m | ret+="mk=%s"%m |
| Line 318 class documentViewer(Folder): | Line 308 class documentViewer(Folder): |
| params["url"] = getParentDir(params["url"]) | params["url"] = getParentDir(params["url"]) |
| # quote values and assemble into query string | # quote values and assemble into query string |
| ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) | #ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) |
| ps = urllib.urlencode(params) | |
| url=self.REQUEST['URL1']+"?"+ps | url=self.REQUEST['URL1']+"?"+ps |
| return url | return url |
| Line 359 class documentViewer(Folder): | Line 350 class documentViewer(Folder): |
| elif access is None or access in self.authgroups: | elif access is None or access in self.authgroups: |
| # only local access -- only logged in users | # only local access -- only logged in users |
| user = getSecurityManager().getUser() | user = getSecurityManager().getUser() |
| logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr())) | |
| if user is not None: | if user is not None: |
| #print "user: ", user | #print "user: ", user |
| return (user.getUserName() != "Anonymous User") | return (user.getUserName() != "Anonymous User") |
| else: | else: |
| return False | return False |
| logging.debug("documentViewer (accessOK) unknown access type %s"%access) | logging.error("documentViewer (accessOK) unknown access type %s"%access) |
| return False | return False |
| Line 399 class documentViewer(Folder): | Line 391 class documentViewer(Folder): |
| return docinfo | return docinfo |
| def getIndexMetaPath(self,url): | |
| """gib nur den Pfad zurueck""" | |
| regexp = re.compile(r".*(experimental|permanent)/(.*)") | |
| regpath = regexp.match(url) | |
| if (regpath==None): | |
| return "" | |
| return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2)) | |
| def getIndexMetaUrl(self,url): | |
| """returns utr of index.meta document at url""" | |
| def getIndexMeta(self, url): | |
| """returns dom of index.meta document at url""" | |
| dom = None | |
| metaUrl = None | metaUrl = None |
| if url.startswith("http://"): | if url.startswith("http://"): |
| # real URL | # real URL |
| Line 414 class documentViewer(Folder): | Line 414 class documentViewer(Folder): |
| if not metaUrl.endswith("index.meta"): | if not metaUrl.endswith("index.meta"): |
| metaUrl += "/index.meta" | metaUrl += "/index.meta" |
| logging.debug("(getIndexMeta): METAURL: %s"%metaUrl) | return metaUrl |
| def getDomFromIndexMeta(self, url): | |
| """get dom from index meta""" | |
| dom = None | |
| metaUrl = self.getIndexMetaUrl(url) | |
| logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl) | |
| txt=getHttpData(metaUrl) | txt=getHttpData(metaUrl) |
| if txt is None: | if txt is None: |
| raise IOError("Unable to read index meta from %s"%(url)) | raise IOError("Unable to read index meta from %s"%(url)) |
| Line 454 class documentViewer(Folder): | Line 461 class documentViewer(Folder): |
| if dom is None: | if dom is None: |
| for x in range(cut): | for x in range(cut): |
| path=getParentDir(path) | path=getParentDir(path) |
| dom = self.getIndexMeta(path) | dom = self.getDomFromIndexMeta(path) |
| acctype = dom.xpath("//access-conditions/access/@type") | acctype = dom.xpath("//access-conditions/access/@type") |
| if acctype and (len(acctype)>0): | if acctype and (len(acctype)>0): |
| Line 476 class documentViewer(Folder): | Line 483 class documentViewer(Folder): |
| if dom is None: | if dom is None: |
| for x in range(cut): | for x in range(cut): |
| path=getParentDir(path) | path=getParentDir(path) |
| dom = self.getIndexMeta(path) | dom = self.getDomFromIndexMeta(path) |
| docinfo['indexMetaPath']=self.getIndexMetaPath(path); | |
| logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) | logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) |
| # put in all raw bib fields as dict "bib" | # put in all raw bib fields as dict "bib" |
| Line 498 class documentViewer(Folder): | Line 507 class documentViewer(Folder): |
| bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) | bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) |
| docinfo['bib_type'] = bibtype | docinfo['bib_type'] = bibtype |
| bibmap=metaData.generateMappingForType(bibtype) | bibmap=metaData.generateMappingForType(bibtype) |
| logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap)) | |
| logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype)) | |
| # if there is no mapping bibmap is empty (mapping sometimes has empty fields) | # if there is no mapping bibmap is empty (mapping sometimes has empty fields) |
| if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: | if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: |
| try: | try: |
| Line 526 class documentViewer(Folder): | Line 537 class documentViewer(Folder): |
| if docinfo.get('lang', None) is None: | if docinfo.get('lang', None) is None: |
| docinfo['lang'] = '' # default keine Sprache gesetzt | docinfo['lang'] = '' # default keine Sprache gesetzt |
| if dom is None: | if dom is None: |
| dom = self.getIndexMeta(url) | dom = self.getDomFromIndexMeta(url) |
| archivePath = None | archivePath = None |
| archiveName = None | archiveName = None |
| Line 581 class documentViewer(Folder): | Line 592 class documentViewer(Folder): |
| viewerUrl = getTextFromNode(viewerUrls[0]) | viewerUrl = getTextFromNode(viewerUrls[0]) |
| docinfo['viewerURL'] = viewerUrl | docinfo['viewerURL'] = viewerUrl |
| # old style text URL | |
| textUrls = dom.xpath("//texttool/text") | textUrls = dom.xpath("//texttool/text") |
| if textUrls and (len(textUrls) > 0): | if textUrls and (len(textUrls) > 0): |
| textUrl = getTextFromNode(textUrls[0]) | textUrl = getTextFromNode(textUrls[0]) |
| Line 592 class documentViewer(Folder): | Line 604 class documentViewer(Folder): |
| docinfo['textURL'] = textUrl | docinfo['textURL'] = textUrl |
| # new style text-url-path | |
| textUrls = dom.xpath("//texttool/text-url-path") | textUrls = dom.xpath("//texttool/text-url-path") |
| if textUrls and (len(textUrls) > 0): | if textUrls and (len(textUrls) > 0): |
| textUrl = getTextFromNode(textUrls[0]) | textUrl = getTextFromNode(textUrls[0]) |
| docinfo['textURLPath'] = textUrl | docinfo['textURLPath'] = textUrl |
| if not docinfo['imagePath']: | if not docinfo['imagePath']: |
| # text-only, no page images | # text-only, no page images |
| docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht | docinfo = self.getNumPages(docinfo) |
| presentationUrls = dom.xpath("//texttool/presentation") | presentationUrls = dom.xpath("//texttool/presentation") |
| docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag | docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag |
| Line 685 class documentViewer(Folder): | Line 698 class documentViewer(Folder): |
| self.REQUEST.SESSION['docinfo'] = docinfo | self.REQUEST.SESSION['docinfo'] = docinfo |
| return docinfo | return docinfo |
| def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): | def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""): |
| """returns pageinfo with the given parameters""" | """returns pageinfo with the given parameters""" |
| pageinfo = {} | pageinfo = {} |
| current = getInt(current) | current = getInt(current) |
| Line 708 class documentViewer(Folder): | Line 721 class documentViewer(Folder): |
| pageinfo['numgroups'] += 1 | pageinfo['numgroups'] += 1 |
| pageinfo['viewMode'] = viewMode | pageinfo['viewMode'] = viewMode |
| pageinfo['tocMode'] = tocMode | pageinfo['tocMode'] = tocMode |
| #pageinfo['characterNormalization'] =characterNormalization | |
| pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ') | |
| pageinfo['query'] = self.REQUEST.get('query',' ') | pageinfo['query'] = self.REQUEST.get('query',' ') |
| pageinfo['queryType'] = self.REQUEST.get('queryType',' ') | pageinfo['queryType'] = self.REQUEST.get('queryType',' ') |
| pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') | pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') |