comparison documentViewer.py @ 464:19bd41d95f62 elementtree

first version with new getdocinfo
author casties
date Fri, 29 Jul 2011 18:27:24 +0200
parents 89ad327b4bbd
children 224aad394350
comparison
equal deleted inserted replaced
463:89ad327b4bbd 464:19bd41d95f62
96 bt['isIEMac'] = bt['isIE'] and bt['isMac'] 96 bt['isIEMac'] = bt['isIE'] and bt['isMac']
97 bt['staticHTML'] = False 97 bt['staticHTML'] = False
98 98
99 return bt 99 return bt
100 100
101 def getParentDir(path): 101 def getParentPath(path, cnt=1):
102 """returns pathname shortened by one""" 102 """returns pathname shortened by cnt"""
103 return '/'.join(path.split('/')[0:-1]) 103 # make sure path doesn't end with /
104 104 path = path.rstrip('/')
105 # split by /, shorten, and reassemble
106 return '/'.join(path.split('/')[0:-cnt])
107
105 108
106 ## 109 ##
107 ## documentViewer class 110 ## documentViewer class
108 ## 111 ##
109 class documentViewer(Folder): 112 class documentViewer(Folder):
261 264
262 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 265 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
263 266
264 security.declareProtected('View','index_html') 267 security.declareProtected('View','index_html')
265 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): 268 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None):
266 ''' 269 """
267 view it 270 view it
268 @param mode: defines how to access the document behind url 271 @param mode: defines how to access the document behind url
269 @param url: url which contains display information 272 @param url: url which contains display information
270 @param viewMode: if images display images, if text display text, default is auto (text,images or auto) 273 @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
271 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) 274 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
272 @param characterNormalization type of text display (reg, norm, none) 275 """
273 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
274 '''
275 276
276 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 277 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
277 278
278 if not hasattr(self, 'template'): 279 if not hasattr(self, 'template'):
279 # this won't work 280 # this won't work
368 urlParams[k] = v 369 urlParams[k] = v
369 370
370 # FIXME: does this belong here? 371 # FIXME: does this belong here?
371 if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 372 if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
372 urlParams["mode"] = "imagepath" 373 urlParams["mode"] = "imagepath"
373 urlParams["url"] = getParentDir(urlParams["url"]) 374 urlParams["url"] = getParentPath(urlParams["url"])
374 375
375 # quote values and assemble into query string (not escaping '/') 376 # quote values and assemble into query string (not escaping '/')
376 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) 377 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()])
377 #ps = urllib.urlencode(urlParams) 378 #ps = urllib.urlencode(urlParams)
378 if baseUrl is None: 379 if baseUrl is None:
436 """gibt param von dlInfo aus""" 437 """gibt param von dlInfo aus"""
437 if docinfo is None: 438 if docinfo is None:
438 docinfo = {} 439 docinfo = {}
439 440
440 for x in range(cut): 441 for x in range(cut):
441 path=getParentDir(path) 442 path=getParentPath(path)
442 443
443 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 444 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
444 445
445 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) 446 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
446 447
534 if docinfo is None: 535 if docinfo is None:
535 docinfo = {} 536 docinfo = {}
536 537
537 if dom is None: 538 if dom is None:
538 for x in range(cut): 539 for x in range(cut):
539 path=getParentDir(path) 540 path=getParentPath(path)
540 dom = self.getDomFromIndexMeta(path) 541 dom = self.getDomFromIndexMeta(path)
541 542
542 acc = dom.find(".//access-conditions/access") 543 acc = dom.find(".//access-conditions/access")
543 if acc is not None: 544 if acc is not None:
544 acctype = acc.get('type') 545 acctype = acc.get('type')
589 if docinfo is None: 590 if docinfo is None:
590 docinfo = {} 591 docinfo = {}
591 592
592 if dom is None: 593 if dom is None:
593 for x in range(cut): 594 for x in range(cut):
594 path=getParentDir(path) 595 path=getParentPath(path)
595 dom = self.getDomFromIndexMeta(path) 596 dom = self.getDomFromIndexMeta(path)
596 597
597 docinfo['name']=getText(dom.find("name")) 598 docinfo['name']=getText(dom.find("name"))
598 logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) 599 logging.debug("documentViewer docinfo[name] %s"%docinfo['name'])
599 return docinfo 600 return docinfo
721 docinfo['imagePath'] = path 722 docinfo['imagePath'] = path
722 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) 723 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
723 724
724 pathorig=path 725 pathorig=path
725 for x in range(cut): 726 for x in range(cut):
726 path=getParentDir(path) 727 path=getParentPath(path)
727 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) 728 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
728 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path 729 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
729 docinfo['imageURL'] = imageUrl 730 docinfo['imageURL'] = imageUrl
730 731
731 #TODO: use getDocinfoFromIndexMeta 732 #TODO: use getDocinfoFromIndexMeta
733 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 734 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
734 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 735 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
735 return docinfo 736 return docinfo
736 737
737 738
738 def getDocinfo(self, mode, url): 739 def OLDgetDocinfo(self, mode, url):
739 """returns docinfo depending on mode""" 740 """returns docinfo depending on mode"""
740 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) 741 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
741 # look for cached docinfo in session 742 # look for cached docinfo in session
742 if self.REQUEST.SESSION.has_key('docinfo'): 743 if self.REQUEST.SESSION.has_key('docinfo'):
743 docinfo = self.REQUEST.SESSION['docinfo'] 744 docinfo = self.REQUEST.SESSION['docinfo']
766 logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) 767 logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
767 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 768 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
768 # store in session 769 # store in session
769 self.REQUEST.SESSION['docinfo'] = docinfo 770 self.REQUEST.SESSION['docinfo'] = docinfo
770 return docinfo 771 return docinfo
771 772
773
774 def getDocinfo(self, mode, url):
775 """returns docinfo depending on mode"""
776 logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url))
777 # look for cached docinfo in session
778 if self.REQUEST.SESSION.has_key('docinfo'):
779 docinfo = self.REQUEST.SESSION['docinfo']
780 # check if its still current
781 if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url:
782 logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys())
783 return docinfo
784
785 # new docinfo
786 docinfo = {'mode': mode, 'url': url}
787 # add self url
788 docinfo['viewerUrl'] = self.getDocumentViewerURL()
789 # get index.meta DOM
790 docUrl = None
791 metaDom = None
792 if mode=="texttool":
793 # url points to document dir or index.meta
794 metaDom = self.metadataService.getDomFromPathOrUrl(url)
795 docUrl = url.replace('/index.meta', '')
796 if metaDom is None:
797 raise IOError("Unable to find index.meta for mode=texttool!")
798
799 elif mode=="imagepath":
800 # url points to folder with images, index.meta optional
801 # asssume index.meta in parent dir
802 docUrl = getParentPath(url)
803 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
804
805 elif mode=="filepath":
806 # url points to image file, index.meta optional
807 # asssume index.meta is two path segments up
808 docUrl = getParentPath(url, 2)
809 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
810
811 else:
812 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
813 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
814
815 docinfo['documentUrl'] = docUrl
816 # process index.meta contents
817 if metaDom is not None:
818 # document directory name and path
819 resource = self.metadataService.getResourceData(dom=metaDom)
820 if resource:
821 docinfo = self.getDocinfoFromResource(docinfo, resource)
822
823 # texttool info
824 texttool = self.metadataService.getTexttoolData(dom=metaDom)
825 if texttool:
826 docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
827
828 # bib info
829 bib = self.metadataService.getBibData(dom=metaDom)
830 if bib:
831 docinfo = self.getDocinfoFromBib(docinfo, bib)
832
833 # auth info
834 access = self.metadataService.getAccessData(dom=metaDom)
835 if access:
836 docinfo = self.getDocinfoFromAccess(docinfo, access)
837
838 # image path
839 if mode != 'texttool':
840 # override image path from texttool
841 docinfo['imagePath'] = url
842
843 # number of images from digilib
844 if docinfo.get('imagePath', None):
845 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath']
846 docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath'])
847
848 logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys())
849 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
850 # store in session
851 self.REQUEST.SESSION['docinfo'] = docinfo
852 return docinfo
853
854 def getDocinfoFromResource(self, docinfo, resource):
855 """reads contents of resource element into docinfo"""
856 docName = resource.get('name', None)
857 docinfo['documentName'] = docName
858 docPath = resource.get('archive-path', None)
859 if docPath:
860 # clean up document path
861 if docPath[0] != '/':
862 docPath = '/' + docPath
863
864 if docName and (not docPath.endswith(docName)):
865 docPath += "/" + docName
866
867 else:
868 # use docUrl as docPath
869 docUrl = docinfo['documentURL']
870 if not docUrl.startswith('http:'):
871 docPath = docUrl
872
873 docinfo['documentPath'] = docPath
874 return docinfo
875
876 def getDocinfoFromTexttool(self, docinfo, texttool):
877 """reads contents of texttool element into docinfo"""
878 # image dir
879 imageDir = texttool.get('image', None)
880 docPath = docinfo.get('documentPath', None)
881 if imageDir and docPath:
882 #print "image: ", imageDir, " archivepath: ", archivePath
883 imageDir = os.path.join(docPath, imageDir)
884 imageDir = imageDir.replace('/mpiwg/online', '', 1)
885 docinfo['imagePath'] = imageDir
886
887 # old style text URL
888 textUrl = texttool.get('text', None)
889 if textUrl and docPath:
890 if urlparse.urlparse(textUrl)[0] == "": #keine url
891 textUrl = os.path.join(docPath, textUrl)
892 # fix URLs starting with /mpiwg/online
893 textUrl = textUrl.replace('/mpiwg/online', '', 1)
894
895 docinfo['textURL'] = textUrl
896
897 # new style text-url-path
898 textUrl = texttool.get('text-url-path', None)
899 if textUrl:
900 docinfo['textURLPath'] = textUrl
901 #TODO: ugly:
902 #textUrlkurz = string.split(textUrl, ".")[0]
903 #docinfo['textURLPathkurz'] = textUrlkurz
904
905 # old presentation stuff
906 presentation = texttool.get('presentation', None)
907 if presentation and docPath:
908 docinfo['presentationPath'] = os.path.join(docPath, presentation)
909
910 return docinfo
911
912 def getDocinfoFromBib(self, docinfo, bib):
913 """reads contents of bib element into docinfo"""
914 # put all raw bib fields in dict "bib"
915 docinfo['bib'] = bib
916 bibtype = bib.get('@type', None)
917 docinfo['bibType'] = bibtype
918 # also store DC metadata for convenience
919 dc = self.metadataService.getDCMappedData(bib)
920 docinfo['creator'] = dc.get('creator',None)
921 docinfo['title'] = dc.get('title',None)
922 docinfo['date'] = dc.get('date',None)
923 return docinfo
924
925 def getDocinfoFromAccess(self, docinfo, acc):
926 """reads contents of access element into docinfo"""
927 #TODO: also read resource type
928 try:
929 acctype = accc['@attr']['type']
930 if acctype:
931 access=acctype
932 if access in ['group', 'institution']:
933 access = acc['name'].lower()
934
935 docinfo['accessType'] = access
936
937 except:
938 pass
939
940 return docinfo
941
942 def getDocinfoFromDigilib(self, docinfo, path):
943 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
944 # fetch data
945 txt = getHttpData(infoUrl)
946 if not txt:
947 logging.error("Unable to get dir-info from %s"%(infoUrl))
948 return docinfo
949
950 dom = ET.fromstring(txt)
951 size = getText(dom.find("size"))
952 logging.debug("getDocinfoFromDigilib: size=%s"%size)
953 if size:
954 docinfo['numPages'] = int(size)
955 else:
956 docinfo['numPages'] = 0
957
958 # TODO: produce and keep list of image names and numbers
959 return docinfo
960
961
772 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): 962 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
773 """returns pageinfo with the given parameters""" 963 """returns pageinfo with the given parameters"""
774 pageinfo = {} 964 pageinfo = {}
775 current = getInt(current) 965 current = getInt(current)
776 966