Annotation of documentViewer/documentViewer.py, revision 1.4

1.1       dwinter     1: 
                      2: genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/"
                      3: 
                      4: from OFS.Folder import Folder
                      5: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
                      6: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
                      7: from AccessControl import ClassSecurityInfo
                      8: from Globals import package_home
                      9: 
                     10: from Ft.Xml.Domlette import NonvalidatingReader
                     11: from Ft.Xml.Domlette import PrettyPrint, Print
                     12: from Ft.Xml import EMPTY_NAMESPACE
                     13: 
                     14: import Ft.Xml.XPath
                     15: 
                     16: import os.path
                     17: import cgi
                     18: import urllib
1.3       casties    19: import zLOG
1.1       dwinter    20: 
1.4     ! casties    21: def getInt(number, default=0):
        !            22:     """returns always an int (0 in case of problems)"""
        !            23:     try:
        !            24:         return int(number)
        !            25:     except:
        !            26:         return default
        !            27:     
        !            28: 
1.1       dwinter    29: def getTextFromNode(nodename):
                     30:     nodelist=nodename.childNodes
                     31:     rc = ""
                     32:     for node in nodelist:
                     33:         if node.nodeType == node.TEXT_NODE:
                     34:            rc = rc + node.data
                     35:     return rc
                     36: 
                     37: import socket
                     38: 
                     39: def urlopen(url):
                     40:         """urlopen mit timeout"""
                     41:         socket.setdefaulttimeout(2)
                     42:         ret=urllib.urlopen(url)
                     43:         socket.setdefaulttimeout(5)
                     44:         return ret
                     45:     
                     46: def getParamFromDigilib(path,param):
                     47:     """gibt param von dlInfo aus"""
1.3       casties    48:     imageUrl=genericDigilib+"/dirInfo-xml.jsp?mo=dir&fn="+path
1.1       dwinter    49: 
1.3       casties    50:     zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo (%s) from %s"%(param,imageUrl))
                     51:     
1.1       dwinter    52:     try:
                     53:         dom = NonvalidatingReader.parseUri(imageUrl)
                     54:     except:
                     55:         return None
                     56:     
1.3       casties    57:     params=dom.xpath("//dir/%s"%param)
                     58:     zLOG.LOG("documentViewer (getparamfromdigilib)", zLOG.INFO, "dirInfo:%s"%params)
1.1       dwinter    59:     
                     60:     if params:
1.3       casties    61:         return getTextFromNode(params[0])
                     62: 
1.1       dwinter    63:     
                     64: 
                     65: 
1.3       casties    66: ##
                     67: ## documentViewer class
                     68: ##
                     69: class documentViewer(Folder):
1.1       dwinter    70:     """document viewer"""
                     71: 
                     72:     meta_type="Document viewer"
                     73:     
                     74:     security=ClassSecurityInfo()
1.3       casties    75:     manage_options=Folder.manage_options+(
1.1       dwinter    76:         {'label':'main config','action':'changeDocumentViewerForm'},
                     77:         )
                     78: 
1.3       casties    79:     # templates and forms
                     80:     viewer_main = PageTemplateFile('zpt/viewer_main', globals())
                     81:     thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
                     82:     image_main = PageTemplateFile('zpt/image_main', globals())
                     83:     head_main = PageTemplateFile('zpt/head_main', globals())
                     84:     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
                     85: 
                     86:     security.declareProtected('View management screens','changeDocumentViewerForm')    
                     87:     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
                     88: 
1.1       dwinter    89:     
1.4     ! casties    90:     def __init__(self,id,imageViewerUrl,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10):
1.1       dwinter    91:         """init document viewer"""
                     92:         self.id=id
                     93:         self.title=title
                     94:         self.imageViewerUrl=imageViewerUrl
1.4     ! casties    95:         if not digilibBaseUrl:
1.3       casties    96:             self.digilibBaseUrl = self.findDigilibUrl()
1.4     ! casties    97:         else:
        !            98:             self.digilibBaseUrl = digilibBaseUrl
        !            99:         self.thumbcols = thumbcols
        !           100:         self.thumbrows = thumbrows
1.3       casties   101:         # add template folder so we can always use template.something
                    102:         self.manage_addFolder('template')
                    103: 
                    104: 
                    105:     security.declareProtected('View','index_html')
1.4     ! casties   106:     def index_html(self,mode,url,start=None,pn=1):
1.3       casties   107:         '''
                    108:         view it
                    109:         @param mode: defines which type of document is behind url
                    110:         @param url: url which contains display information
                    111:         '''
                    112:         
                    113:         zLOG.LOG("documentViewer (index)", zLOG.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
                    114:         print "dlbaseurl:", self.digilibBaseUrl
1.1       dwinter   115:         
1.3       casties   116:         if not hasattr(self, 'template'):
                    117:             # create template folder if it doesn't exist
                    118:             print "no template folder -- creating"
                    119:             self.manage_addFolder('template')
                    120:             
                    121:         if not self.digilibBaseUrl:
                    122:             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
                    123:             
                    124:         print "dlbaseurl:", self.digilibBaseUrl
                    125: 
1.4     ! casties   126:         docinfo = self.getDocinfo(mode=mode,url=url)
        !           127:         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
1.3       casties   128:         pt = getattr(self.template, 'viewer_main')
                    129:         return pt(docinfo=docinfo,pageinfo=pageinfo)
1.1       dwinter   130:   
                    131:   
1.4     ! casties   132:     def getLink(self,param=None,val=None):
        !           133:         """link to documentviewer with parameter param set to val"""
        !           134:         params=cgi.parse_qs(self.REQUEST['QUERY_STRING'])
        !           135:         if param is not None:
        !           136:             if val is None and params.has_key(param):
        !           137:                 del params[param]
        !           138:             else:
        !           139:                 params[param] = val
1.1       dwinter   140:         
1.4     ! casties   141:         url=self.REQUEST['URL']+"?"+urllib.urlencode(params, doseq=True)
        !           142:         return url
        !           143: 
        !           144:     
1.3       casties   145:     def getStyle(self, idx, selected, style=""):
1.4     ! casties   146:         """returns a string with the given style and append 'sel' if path == selected."""
1.3       casties   147:         #zLOG.LOG("documentViewer (getstyle)", zLOG.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
                    148:         if idx == selected:
                    149:             return style + 'sel'
                    150:         else:
                    151:             return style    
1.2       dwinter   152:         
                    153:         
1.3       casties   154:     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None):
                    155:         """gets bibliographical info from the index.meta file at url or given by dom"""
                    156:         zLOG.LOG("documentViewer (getbibinfofromindexmeta)", zLOG.INFO,"path: %s"%(path))
1.2       dwinter   157:         
1.3       casties   158:         if docinfo is None:
                    159:             docinfo = {}
                    160:             
                    161:         if dom is None:
                    162:             server="http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn="
                    163:             path="/".join(path.split("/")[0:-1])
                    164:             metaUrl=server+path+"/index.meta"
                    165:             try:
                    166:                 dom = NonvalidatingReader.parseUri(metaUrl)
                    167:             except:
                    168:                 return docinfo
1.2       dwinter   169:         
1.4     ! casties   170:         metaData=self.metadata.main.meta.bib
        !           171:         bibtype=dom.xpath("//bib/@type")
        !           172:         if bibtype and (len(bibtype)>0):
        !           173:             bibtype=bibtype[0].value
1.2       dwinter   174:         else:
1.4     ! casties   175:             bibtype="generic"
        !           176:         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
        !           177:         bibmap=metaData.generateMappingForType(bibtype)
        !           178:         if len(bibmap) > 0:
        !           179:             docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
        !           180:             docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
        !           181:             docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
1.3       casties   182:         
                    183:         return docinfo
                    184: 
                    185:         
                    186:     def getDocinfoFromTextTool(self,url,docinfo=None):
                    187:        """parse texttool tag in index meta"""
                    188:        zLOG.LOG("documentViewer (getdocinfofromtexttool)", zLOG.INFO,"url: %s"%(url))
                    189:        if docinfo is None:
                    190:            docinfo = {}
                    191:            
                    192:        try:
                    193:            dom = NonvalidatingReader.parseUri(url)
                    194:        except:
                    195:            zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
                    196:            return docinfo
                    197:        
                    198:        archivePaths=dom.xpath("//resource/archive-path")
                    199:        
                    200:        if archivePaths and (len(archivePaths)>0):
                    201:            archivePath=getTextFromNode(archivePaths[0])
                    202:        else:
                    203:            archivePath=None
                    204:        
                    205:        images=dom.xpath("//texttool/image")
                    206:        
                    207:        if images and (len(images)>0):
                    208:            image=getTextFromNode(images[0])
                    209:        else:
                    210:            image=None
                    211:            
                    212:        if image and archivePath:
                    213:            image=os.path.join(archivePath,image)
                    214:            image=image.replace("/mpiwg/online",'')
                    215:            pt=getParamFromDigilib(image,'size')
                    216:            docinfo['imagePath'] = image
1.4     ! casties   217:            docinfo['numPages'] = pt
1.3       casties   218:            
                    219:        viewerUrls=dom.xpath("//texttool/digiliburlprefix")
                    220:        
                    221:        if viewerUrls and (len(viewerUrls)>0):
                    222:            viewerUrl=getTextFromNode(viewerUrls[0])
                    223:            docinfo['imageURL'] = viewerURL
                    224:                   
                    225:        textUrls=dom.xpath("//texttool/text")
                    226:        
                    227:        if textUrls and (len(textUrls)>0):
                    228:            textUrl=getTextFromNode(textUrls[0])
                    229:            docinfo['textURL'] = textURL
                    230:                      
                    231:        docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom)
                    232:        return docinfo
                    233:    
                    234: 
                    235:     def getDocinfoFromImagePath(self,path,docinfo=None):
                    236:         """path ist the path to the images it assumes that the index.meta file is one level higher."""
                    237:         zLOG.LOG("documentViewer (getdocinfofromimagepath)", zLOG.INFO,"path: %s"%(path))
                    238:         if docinfo is None:
                    239:             docinfo = {}
                    240:         docinfo['imagePath'] = path
                    241:         path=path.replace("/mpiwg/online","")
                    242:         pt=getParamFromDigilib(path,'size')
1.4     ! casties   243:         docinfo['numPages'] = pt
1.3       casties   244:         imageUrl=genericDigilib+"/servlet/Scaler?fn=%s"%path
                    245:         docinfo['imageURL'] = imageUrl
                    246:         
                    247:         docinfo = self.getBibinfoFromIndexMeta(path,docinfo=docinfo)
                    248:         return docinfo
                    249:     
1.2       dwinter   250:     
1.3       casties   251:     def getDocinfo(self, mode, url):
                    252:         """returns docinfo depending on mode"""
                    253:         zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"mode: %s, url: %s"%(mode,url))
                    254:         # look for cached docinfo in session
                    255:         if self.REQUEST.SESSION.has_key('docinfo'):
                    256:             docinfo = self.REQUEST.SESSION['docinfo']
                    257:             # check if its still current
                    258:             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
                    259:                 zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo in session: %s"%docinfo)
                    260:                 return docinfo
                    261:         # new docinfo
                    262:         docinfo = {'mode': mode, 'url': url}
                    263:         if mode=="texttool": #index.meta with texttool information
                    264:             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
                    265:         elif mode=="imagepath":
                    266:             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
                    267:         else:
                    268:             zLOG.LOG("documentViewer (getdocinfo)", zLOG.ERROR,"unknown mode!")
                    269:         zLOG.LOG("documentViewer (getdocinfo)", zLOG.INFO,"docinfo: %s"%docinfo)
                    270:         self.REQUEST.SESSION['docinfo'] = docinfo
                    271:         return docinfo
1.2       dwinter   272:         
                    273:         
1.4     ! casties   274:     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):
1.3       casties   275:         """returns pageinfo with the given parameters"""
                    276:         pageinfo = {}
1.4     ! casties   277:         current = getInt(current)
        !           278:         pageinfo['current'] = current
        !           279:         rows = int(rows or self.thumbrows)
        !           280:         pageinfo['rows'] = rows
        !           281:         cols = int(cols or self.thumbcols)
        !           282:         pageinfo['cols'] = cols
        !           283:         grpsize = cols * rows
        !           284:         pageinfo['groupsize'] = grpsize
        !           285:         start = getInt(start, default=(int(current / grpsize) * grpsize +1))
        !           286:         print "start3:", start
1.3       casties   287:         pageinfo['start'] = start
1.4     ! casties   288:         pageinfo['end'] = start + grpsize
        !           289:         if docinfo is not None:
        !           290:             np = int(docinfo['numPages'])
        !           291:             pageinfo['end'] = min(pageinfo['end'], np)
        !           292:             pageinfo['numgroups'] = int(np / grpsize)
        !           293:             if np % grpsize > 0:
        !           294:                 pageinfo['numgroups'] += 1
        !           295:                 
1.3       casties   296:         return pageinfo
                    297:                 
1.1       dwinter   298:     def text(self,mode,url,pn):
                    299:         """give text"""
                    300:         if mode=="texttool": #index.meta with texttool information
                    301:             (viewerUrl,imagepath,textpath)=parseUrlTextTool(url)
                    302:         
                    303:         print textpath
                    304:         try:
                    305:             dom = NonvalidatingReader.parseUri(textpath)
                    306:         except:
                    307:             return None
                    308:     
                    309:         list=[]
                    310:         nodes=dom.xpath("//pb")
                    311: 
                    312:         node=nodes[int(pn)-1]
                    313:         
                    314:         p=node
                    315:         
                    316:         while p.tagName!="p":
                    317:             p=p.parentNode
                    318:         
                    319:         
                    320:         endNode=nodes[int(pn)]
                    321:         
                    322:         
                    323:         e=endNode
                    324:         
                    325:         while e.tagName!="p":
                    326:             e=e.parentNode
                    327:         
                    328:         
                    329:         next=node.parentNode
                    330:         
                    331:         #sammle s
                    332:         while next and (next!=endNode.parentNode):
                    333:             list.append(next)    
                    334:             next=next.nextSibling    
                    335:         list.append(endNode.parentNode)
                    336:         
                    337:         if p==e:# beide im selben paragraphen
1.2       dwinter   338:             pass
                    339: #    else:
                    340: #            next=p
                    341: #            while next!=e:
                    342: #                print next,e
                    343: #                list.append(next)
                    344: #                next=next.nextSibling
                    345: #            
                    346: #        for x in list:
                    347: #            PrettyPrint(x)
                    348: #
                    349: #        return list
1.3       casties   350: #
                    351: 
                    352:     def findDigilibUrl(self):
                    353:         """try to get the digilib URL from zogilib"""
                    354:         url = self.imageViewerUrl[:-1] + "/getScalerUrl"
                    355:         try:
                    356:             scaler = urlopen(url).read()
                    357:             return scaler.replace("/servlet/Scaler?", "")
                    358:         except:
                    359:             return None
                    360:     
1.4     ! casties   361:     def changeDocumentViewer(self,imageViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,RESPONSE=None):
1.3       casties   362:         """init document viewer"""
                    363:         self.title=title
                    364:         self.imageViewerUrl=imageViewerUrl
                    365:         self.digilibBaseUrl = digilibBaseUrl
1.4     ! casties   366:         self.thumbrows = thumbrows
        !           367:         self.thumbcols = thumbcols
1.1       dwinter   368:         
1.3       casties   369:         if RESPONSE is not None:
                    370:             RESPONSE.redirect('manage_main')
1.1       dwinter   371:     
                    372:     
                    373:         
                    374:         
                    375: #    security.declareProtected('View management screens','renameImageForm')
                    376: 
                    377: def manage_AddDocumentViewerForm(self):
                    378:     """add the viewer form"""
1.3       casties   379:     pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
1.1       dwinter   380:     return pt()
                    381:   
                    382: def manage_AddDocumentViewer(self,id,imageViewerUrl="",title="",RESPONSE=None):
                    383:     """add the viewer"""
                    384:     newObj=documentViewer(id,imageViewerUrl,title)
                    385:     self._setObject(id,newObj)
                    386:     
                    387:     if RESPONSE is not None:
                    388:         RESPONSE.redirect('manage_main')
1.3       casties   389: 
                    390: 
                    391: ##
                    392: ## DocumentViewerTemplate class
                    393: ##
                    394: class DocumentViewerTemplate(ZopePageTemplate):
                    395:     """Template for document viewer"""
                    396:     meta_type="DocumentViewer Template"
                    397: 
                    398: 
                    399: def manage_addDocumentViewerTemplateForm(self):
                    400:     """Form for adding"""
                    401:     pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
                    402:     return pt()
                    403: 
                    404: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
                    405:                            REQUEST=None, submit=None):
                    406:     "Add a Page Template with optional file content."
                    407: 
                    408:     self._setObject(id, DocumentViewerTemplate(id))
                    409:     ob = getattr(self, id)
                    410:     ob.pt_edit(open(os.path.join(package_home(globals()),'zpt/viewer_main.zpt')).read(),None)
                    411:     if title:
                    412:         ob.pt_setTitle(title)
                    413:     try:
                    414:         u = self.DestinationURL()
                    415:     except AttributeError:
                    416:         u = REQUEST['URL1']
                    417:         
                    418:     u = "%s/%s" % (u, urllib.quote(id))
                    419:     REQUEST.RESPONSE.redirect(u+'/manage_main')
                    420:     return ''
                    421: 
                    422: 
1.1       dwinter   423:     

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>