Mercurial > hg > documentViewer
comparison documentViewer.py @ 464:19bd41d95f62 elementtree
first version with new getdocinfo
author | casties |
---|---|
date | Fri, 29 Jul 2011 18:27:24 +0200 |
parents | 89ad327b4bbd |
children | 224aad394350 |
comparison
equal
deleted
inserted
replaced
463:89ad327b4bbd | 464:19bd41d95f62 |
---|---|
96 bt['isIEMac'] = bt['isIE'] and bt['isMac'] | 96 bt['isIEMac'] = bt['isIE'] and bt['isMac'] |
97 bt['staticHTML'] = False | 97 bt['staticHTML'] = False |
98 | 98 |
99 return bt | 99 return bt |
100 | 100 |
101 def getParentDir(path): | 101 def getParentPath(path, cnt=1): |
102 """returns pathname shortened by one""" | 102 """returns pathname shortened by cnt""" |
103 return '/'.join(path.split('/')[0:-1]) | 103 # make sure path doesn't end with / |
104 | 104 path = path.rstrip('/') |
105 # split by /, shorten, and reassemble | |
106 return '/'.join(path.split('/')[0:-cnt]) | |
107 | |
105 | 108 |
106 ## | 109 ## |
107 ## documentViewer class | 110 ## documentViewer class |
108 ## | 111 ## |
109 class documentViewer(Folder): | 112 class documentViewer(Folder): |
261 | 264 |
262 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) | 265 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) |
263 | 266 |
264 security.declareProtected('View','index_html') | 267 security.declareProtected('View','index_html') |
265 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): | 268 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): |
266 ''' | 269 """ |
267 view it | 270 view it |
268 @param mode: defines how to access the document behind url | 271 @param mode: defines how to access the document behind url |
269 @param url: url which contains display information | 272 @param url: url which contains display information |
270 @param viewMode: if images display images, if text display text, default is auto (text,images or auto) | 273 @param viewMode: if images display images, if text display text, default is auto (text,images or auto) |
271 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) | 274 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) |
272 @param characterNormalization type of text display (reg, norm, none) | 275 """ |
273 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) | |
274 ''' | |
275 | 276 |
276 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) | 277 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) |
277 | 278 |
278 if not hasattr(self, 'template'): | 279 if not hasattr(self, 'template'): |
279 # this won't work | 280 # this won't work |
368 urlParams[k] = v | 369 urlParams[k] = v |
369 | 370 |
370 # FIXME: does this belong here? | 371 # FIXME: does this belong here? |
371 if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath | 372 if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath |
372 urlParams["mode"] = "imagepath" | 373 urlParams["mode"] = "imagepath" |
373 urlParams["url"] = getParentDir(urlParams["url"]) | 374 urlParams["url"] = getParentPath(urlParams["url"]) |
374 | 375 |
375 # quote values and assemble into query string (not escaping '/') | 376 # quote values and assemble into query string (not escaping '/') |
376 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) | 377 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) |
377 #ps = urllib.urlencode(urlParams) | 378 #ps = urllib.urlencode(urlParams) |
378 if baseUrl is None: | 379 if baseUrl is None: |
436 """gibt param von dlInfo aus""" | 437 """gibt param von dlInfo aus""" |
437 if docinfo is None: | 438 if docinfo is None: |
438 docinfo = {} | 439 docinfo = {} |
439 | 440 |
440 for x in range(cut): | 441 for x in range(cut): |
441 path=getParentDir(path) | 442 path=getParentPath(path) |
442 | 443 |
443 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path | 444 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path |
444 | 445 |
445 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) | 446 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) |
446 | 447 |
534 if docinfo is None: | 535 if docinfo is None: |
535 docinfo = {} | 536 docinfo = {} |
536 | 537 |
537 if dom is None: | 538 if dom is None: |
538 for x in range(cut): | 539 for x in range(cut): |
539 path=getParentDir(path) | 540 path=getParentPath(path) |
540 dom = self.getDomFromIndexMeta(path) | 541 dom = self.getDomFromIndexMeta(path) |
541 | 542 |
542 acc = dom.find(".//access-conditions/access") | 543 acc = dom.find(".//access-conditions/access") |
543 if acc is not None: | 544 if acc is not None: |
544 acctype = acc.get('type') | 545 acctype = acc.get('type') |
589 if docinfo is None: | 590 if docinfo is None: |
590 docinfo = {} | 591 docinfo = {} |
591 | 592 |
592 if dom is None: | 593 if dom is None: |
593 for x in range(cut): | 594 for x in range(cut): |
594 path=getParentDir(path) | 595 path=getParentPath(path) |
595 dom = self.getDomFromIndexMeta(path) | 596 dom = self.getDomFromIndexMeta(path) |
596 | 597 |
597 docinfo['name']=getText(dom.find("name")) | 598 docinfo['name']=getText(dom.find("name")) |
598 logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) | 599 logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) |
599 return docinfo | 600 return docinfo |
721 docinfo['imagePath'] = path | 722 docinfo['imagePath'] = path |
722 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) | 723 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) |
723 | 724 |
724 pathorig=path | 725 pathorig=path |
725 for x in range(cut): | 726 for x in range(cut): |
726 path=getParentDir(path) | 727 path=getParentPath(path) |
727 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) | 728 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) |
728 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path | 729 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path |
729 docinfo['imageURL'] = imageUrl | 730 docinfo['imageURL'] = imageUrl |
730 | 731 |
731 #TODO: use getDocinfoFromIndexMeta | 732 #TODO: use getDocinfoFromIndexMeta |
733 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) | 734 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) |
734 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) | 735 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) |
735 return docinfo | 736 return docinfo |
736 | 737 |
737 | 738 |
738 def getDocinfo(self, mode, url): | 739 def OLDgetDocinfo(self, mode, url): |
739 """returns docinfo depending on mode""" | 740 """returns docinfo depending on mode""" |
740 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) | 741 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) |
741 # look for cached docinfo in session | 742 # look for cached docinfo in session |
742 if self.REQUEST.SESSION.has_key('docinfo'): | 743 if self.REQUEST.SESSION.has_key('docinfo'): |
743 docinfo = self.REQUEST.SESSION['docinfo'] | 744 docinfo = self.REQUEST.SESSION['docinfo'] |
766 logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) | 767 logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) |
767 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) | 768 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) |
768 # store in session | 769 # store in session |
769 self.REQUEST.SESSION['docinfo'] = docinfo | 770 self.REQUEST.SESSION['docinfo'] = docinfo |
770 return docinfo | 771 return docinfo |
771 | 772 |
773 | |
774 def getDocinfo(self, mode, url): | |
775 """returns docinfo depending on mode""" | |
776 logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url)) | |
777 # look for cached docinfo in session | |
778 if self.REQUEST.SESSION.has_key('docinfo'): | |
779 docinfo = self.REQUEST.SESSION['docinfo'] | |
780 # check if its still current | |
781 if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url: | |
782 logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys()) | |
783 return docinfo | |
784 | |
785 # new docinfo | |
786 docinfo = {'mode': mode, 'url': url} | |
787 # add self url | |
788 docinfo['viewerUrl'] = self.getDocumentViewerURL() | |
789 # get index.meta DOM | |
790 docUrl = None | |
791 metaDom = None | |
792 if mode=="texttool": | |
793 # url points to document dir or index.meta | |
794 metaDom = self.metadataService.getDomFromPathOrUrl(url) | |
795 docUrl = url.replace('/index.meta', '') | |
796 if metaDom is None: | |
797 raise IOError("Unable to find index.meta for mode=texttool!") | |
798 | |
799 elif mode=="imagepath": | |
800 # url points to folder with images, index.meta optional | |
801 # asssume index.meta in parent dir | |
802 docUrl = getParentPath(url) | |
803 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) | |
804 | |
805 elif mode=="filepath": | |
806 # url points to image file, index.meta optional | |
807 # asssume index.meta is two path segments up | |
808 docUrl = getParentPath(url, 2) | |
809 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) | |
810 | |
811 else: | |
812 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) | |
813 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) | |
814 | |
815 docinfo['documentUrl'] = docUrl | |
816 # process index.meta contents | |
817 if metaDom is not None: | |
818 # document directory name and path | |
819 resource = self.metadataService.getResourceData(dom=metaDom) | |
820 if resource: | |
821 docinfo = self.getDocinfoFromResource(docinfo, resource) | |
822 | |
823 # texttool info | |
824 texttool = self.metadataService.getTexttoolData(dom=metaDom) | |
825 if texttool: | |
826 docinfo = self.getDocinfoFromTexttool(docinfo, texttool) | |
827 | |
828 # bib info | |
829 bib = self.metadataService.getBibData(dom=metaDom) | |
830 if bib: | |
831 docinfo = self.getDocinfoFromBib(docinfo, bib) | |
832 | |
833 # auth info | |
834 access = self.metadataService.getAccessData(dom=metaDom) | |
835 if access: | |
836 docinfo = self.getDocinfoFromAccess(docinfo, access) | |
837 | |
838 # image path | |
839 if mode != 'texttool': | |
840 # override image path from texttool | |
841 docinfo['imagePath'] = url | |
842 | |
843 # number of images from digilib | |
844 if docinfo.get('imagePath', None): | |
845 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath'] | |
846 docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath']) | |
847 | |
848 logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) | |
849 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) | |
850 # store in session | |
851 self.REQUEST.SESSION['docinfo'] = docinfo | |
852 return docinfo | |
853 | |
854 def getDocinfoFromResource(self, docinfo, resource): | |
855 """reads contents of resource element into docinfo""" | |
856 docName = resource.get('name', None) | |
857 docinfo['documentName'] = docName | |
858 docPath = resource.get('archive-path', None) | |
859 if docPath: | |
860 # clean up document path | |
861 if docPath[0] != '/': | |
862 docPath = '/' + docPath | |
863 | |
864 if docName and (not docPath.endswith(docName)): | |
865 docPath += "/" + docName | |
866 | |
867 else: | |
868 # use docUrl as docPath | |
869 docUrl = docinfo['documentURL'] | |
870 if not docUrl.startswith('http:'): | |
871 docPath = docUrl | |
872 | |
873 docinfo['documentPath'] = docPath | |
874 return docinfo | |
875 | |
876 def getDocinfoFromTexttool(self, docinfo, texttool): | |
877 """reads contents of texttool element into docinfo""" | |
878 # image dir | |
879 imageDir = texttool.get('image', None) | |
880 docPath = docinfo.get('documentPath', None) | |
881 if imageDir and docPath: | |
882 #print "image: ", imageDir, " archivepath: ", archivePath | |
883 imageDir = os.path.join(docPath, imageDir) | |
884 imageDir = imageDir.replace('/mpiwg/online', '', 1) | |
885 docinfo['imagePath'] = imageDir | |
886 | |
887 # old style text URL | |
888 textUrl = texttool.get('text', None) | |
889 if textUrl and docPath: | |
890 if urlparse.urlparse(textUrl)[0] == "": #keine url | |
891 textUrl = os.path.join(docPath, textUrl) | |
892 # fix URLs starting with /mpiwg/online | |
893 textUrl = textUrl.replace('/mpiwg/online', '', 1) | |
894 | |
895 docinfo['textURL'] = textUrl | |
896 | |
897 # new style text-url-path | |
898 textUrl = texttool.get('text-url-path', None) | |
899 if textUrl: | |
900 docinfo['textURLPath'] = textUrl | |
901 #TODO: ugly: | |
902 #textUrlkurz = string.split(textUrl, ".")[0] | |
903 #docinfo['textURLPathkurz'] = textUrlkurz | |
904 | |
905 # old presentation stuff | |
906 presentation = texttool.get('presentation', None) | |
907 if presentation and docPath: | |
908 docinfo['presentationPath'] = os.path.join(docPath, presentation) | |
909 | |
910 return docinfo | |
911 | |
912 def getDocinfoFromBib(self, docinfo, bib): | |
913 """reads contents of bib element into docinfo""" | |
914 # put all raw bib fields in dict "bib" | |
915 docinfo['bib'] = bib | |
916 bibtype = bib.get('@type', None) | |
917 docinfo['bibType'] = bibtype | |
918 # also store DC metadata for convenience | |
919 dc = self.metadataService.getDCMappedData(bib) | |
920 docinfo['creator'] = dc.get('creator',None) | |
921 docinfo['title'] = dc.get('title',None) | |
922 docinfo['date'] = dc.get('date',None) | |
923 return docinfo | |
924 | |
925 def getDocinfoFromAccess(self, docinfo, acc): | |
926 """reads contents of access element into docinfo""" | |
927 #TODO: also read resource type | |
928 try: | |
929 acctype = accc['@attr']['type'] | |
930 if acctype: | |
931 access=acctype | |
932 if access in ['group', 'institution']: | |
933 access = acc['name'].lower() | |
934 | |
935 docinfo['accessType'] = access | |
936 | |
937 except: | |
938 pass | |
939 | |
940 return docinfo | |
941 | |
942 def getDocinfoFromDigilib(self, docinfo, path): | |
943 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path | |
944 # fetch data | |
945 txt = getHttpData(infoUrl) | |
946 if not txt: | |
947 logging.error("Unable to get dir-info from %s"%(infoUrl)) | |
948 return docinfo | |
949 | |
950 dom = ET.fromstring(txt) | |
951 size = getText(dom.find("size")) | |
952 logging.debug("getDocinfoFromDigilib: size=%s"%size) | |
953 if size: | |
954 docinfo['numPages'] = int(size) | |
955 else: | |
956 docinfo['numPages'] = 0 | |
957 | |
958 # TODO: produce and keep list of image names and numbers | |
959 return docinfo | |
960 | |
961 | |
772 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): | 962 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): |
773 """returns pageinfo with the given parameters""" | 963 """returns pageinfo with the given parameters""" |
774 pageinfo = {} | 964 pageinfo = {} |
775 current = getInt(current) | 965 current = getInt(current) |
776 | 966 |