Changeset 464:19bd41d95f62 in documentViewer
- Timestamp:
- Jul 29, 2011, 4:27:24 PM (14 years ago)
- Branch:
- elementtree
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
SrvTxtUtils.py
r458 r464 23 23 text = node.text or "" 24 24 for e in node: 25 text += get text(e)25 text += getText(e) 26 26 if e.tail: 27 27 text += e.tail -
documentViewer.py
r463 r464 99 99 return bt 100 100 101 def getParentDir(path): 102 """returns pathname shortened by one""" 103 return '/'.join(path.split('/')[0:-1]) 104 101 def getParentPath(path, cnt=1): 102 """returns pathname shortened by cnt""" 103 # make sure path doesn't end with / 104 path = path.rstrip('/') 105 # split by /, shorten, and reassemble 106 return '/'.join(path.split('/')[0:-cnt]) 107 105 108 106 109 ## … … 264 267 security.declareProtected('View','index_html') 265 268 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): 266 '''269 """ 267 270 view it 268 271 @param mode: defines how to access the document behind url … … 270 273 @param viewMode: if images display images, if text display text, default is auto (text,images or auto) 271 274 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) 272 @param characterNormalization type of text display (reg, norm, none) 273 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) 274 ''' 275 """ 275 276 276 277 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) … … 371 372 if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 372 373 urlParams["mode"] = "imagepath" 373 urlParams["url"] = getParent Dir(urlParams["url"])374 urlParams["url"] = getParentPath(urlParams["url"]) 374 375 375 376 # quote values and assemble into query string (not escaping '/') … … 439 440 440 441 for x in range(cut): 441 path=getParent Dir(path)442 path=getParentPath(path) 442 443 443 444 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path … … 537 538 if dom is None: 538 539 for x in range(cut): 539 path=getParent Dir(path)540 path=getParentPath(path) 540 541 dom = self.getDomFromIndexMeta(path) 541 542 … … 592 593 if dom is None: 593 594 for x in range(cut): 594 path=getParent Dir(path)595 path=getParentPath(path) 595 596 dom = self.getDomFromIndexMeta(path) 596 597 … … 724 725 pathorig=path 725 726 for x in range(cut): 726 path=getParent Dir(path)727 path=getParentPath(path) 727 728 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) 728 729 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path … … 736 737 737 738 738 def getDocinfo(self, mode, url):739 def OLDgetDocinfo(self, mode, url): 739 740 """returns docinfo depending on mode""" 740 741 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) … … 769 770 self.REQUEST.SESSION['docinfo'] = docinfo 770 771 return docinfo 771 772 773 774 def getDocinfo(self, mode, url): 775 """returns docinfo depending on mode""" 776 logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url)) 777 # look for cached docinfo in session 778 if self.REQUEST.SESSION.has_key('docinfo'): 779 docinfo = self.REQUEST.SESSION['docinfo'] 780 # check if its still current 781 if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url: 782 logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys()) 783 return docinfo 784 785 # new docinfo 786 docinfo = {'mode': mode, 'url': url} 787 # add self url 788 docinfo['viewerUrl'] = self.getDocumentViewerURL() 789 # get index.meta DOM 790 docUrl = None 791 metaDom = None 792 if mode=="texttool": 793 # url points to document dir or index.meta 794 metaDom = self.metadataService.getDomFromPathOrUrl(url) 795 docUrl = url.replace('/index.meta', '') 796 if metaDom is None: 797 raise IOError("Unable to find index.meta for mode=texttool!") 798 799 elif mode=="imagepath": 800 # url points to folder with images, index.meta optional 801 # asssume index.meta in parent dir 802 docUrl = getParentPath(url) 803 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 804 805 elif mode=="filepath": 806 # url points to image file, index.meta optional 807 # asssume index.meta is two path segments up 808 docUrl = getParentPath(url, 2) 809 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 810 811 else: 812 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 813 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 814 815 docinfo['documentUrl'] = docUrl 816 # process index.meta contents 817 if metaDom is not None: 818 # document directory name and path 819 resource = self.metadataService.getResourceData(dom=metaDom) 820 if resource: 821 docinfo = self.getDocinfoFromResource(docinfo, resource) 822 823 # texttool info 824 texttool = self.metadataService.getTexttoolData(dom=metaDom) 825 if texttool: 826 docinfo = self.getDocinfoFromTexttool(docinfo, texttool) 827 828 # bib info 829 bib = self.metadataService.getBibData(dom=metaDom) 830 if bib: 831 docinfo = self.getDocinfoFromBib(docinfo, bib) 832 833 # auth info 834 access = self.metadataService.getAccessData(dom=metaDom) 835 if access: 836 docinfo = self.getDocinfoFromAccess(docinfo, access) 837 838 # image path 839 if mode != 'texttool': 840 # override image path from texttool 841 docinfo['imagePath'] = url 842 843 # number of images from digilib 844 if docinfo.get('imagePath', None): 845 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath'] 846 docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath']) 847 848 logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) 849 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 850 # store in session 851 self.REQUEST.SESSION['docinfo'] = docinfo 852 return docinfo 853 854 def getDocinfoFromResource(self, docinfo, resource): 855 """reads contents of resource element into docinfo""" 856 docName = resource.get('name', None) 857 docinfo['documentName'] = docName 858 docPath = resource.get('archive-path', None) 859 if docPath: 860 # clean up document path 861 if docPath[0] != '/': 862 docPath = '/' + docPath 863 864 if docName and (not docPath.endswith(docName)): 865 docPath += "/" + docName 866 867 else: 868 # use docUrl as docPath 869 docUrl = docinfo['documentURL'] 870 if not docUrl.startswith('http:'): 871 docPath = docUrl 872 873 docinfo['documentPath'] = docPath 874 return docinfo 875 876 def getDocinfoFromTexttool(self, docinfo, texttool): 877 """reads contents of texttool element into docinfo""" 878 # image dir 879 imageDir = texttool.get('image', None) 880 docPath = docinfo.get('documentPath', None) 881 if imageDir and docPath: 882 #print "image: ", imageDir, " archivepath: ", archivePath 883 imageDir = os.path.join(docPath, imageDir) 884 imageDir = imageDir.replace('/mpiwg/online', '', 1) 885 docinfo['imagePath'] = imageDir 886 887 # old style text URL 888 textUrl = texttool.get('text', None) 889 if textUrl and docPath: 890 if urlparse.urlparse(textUrl)[0] == "": #keine url 891 textUrl = os.path.join(docPath, textUrl) 892 # fix URLs starting with /mpiwg/online 893 textUrl = textUrl.replace('/mpiwg/online', '', 1) 894 895 docinfo['textURL'] = textUrl 896 897 # new style text-url-path 898 textUrl = texttool.get('text-url-path', None) 899 if textUrl: 900 docinfo['textURLPath'] = textUrl 901 #TODO: ugly: 902 #textUrlkurz = string.split(textUrl, ".")[0] 903 #docinfo['textURLPathkurz'] = textUrlkurz 904 905 # old presentation stuff 906 presentation = texttool.get('presentation', None) 907 if presentation and docPath: 908 docinfo['presentationPath'] = os.path.join(docPath, presentation) 909 910 return docinfo 911 912 def getDocinfoFromBib(self, docinfo, bib): 913 """reads contents of bib element into docinfo""" 914 # put all raw bib fields in dict "bib" 915 docinfo['bib'] = bib 916 bibtype = bib.get('@type', None) 917 docinfo['bibType'] = bibtype 918 # also store DC metadata for convenience 919 dc = self.metadataService.getDCMappedData(bib) 920 docinfo['creator'] = dc.get('creator',None) 921 docinfo['title'] = dc.get('title',None) 922 docinfo['date'] = dc.get('date',None) 923 return docinfo 924 925 def getDocinfoFromAccess(self, docinfo, acc): 926 """reads contents of access element into docinfo""" 927 #TODO: also read resource type 928 try: 929 acctype = accc['@attr']['type'] 930 if acctype: 931 access=acctype 932 if access in ['group', 'institution']: 933 access = acc['name'].lower() 934 935 docinfo['accessType'] = access 936 937 except: 938 pass 939 940 return docinfo 941 942 def getDocinfoFromDigilib(self, docinfo, path): 943 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 944 # fetch data 945 txt = getHttpData(infoUrl) 946 if not txt: 947 logging.error("Unable to get dir-info from %s"%(infoUrl)) 948 return docinfo 949 950 dom = ET.fromstring(txt) 951 size = getText(dom.find("size")) 952 logging.debug("getDocinfoFromDigilib: size=%s"%size) 953 if size: 954 docinfo['numPages'] = int(size) 955 else: 956 docinfo['numPages'] = 0 957 958 # TODO: produce and keep list of image names and numbers 959 return docinfo 960 961 772 962 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): 773 963 """returns pageinfo with the given parameters"""
Note: See TracChangeset
for help on using the changeset viewer.