Mercurial > hg > documentViewer
comparison documentViewer.py @ 454:73e3273c7624 elementtree
more work
author | casties |
---|---|
date | Fri, 15 Jul 2011 11:02:26 +0200 |
parents | beb7ccb92564 |
children | 0a53fea83df7 |
comparison
equal
deleted
inserted
replaced
453:beb7ccb92564 | 454:73e3273c7624 |
---|---|
497 """gibt param von dlInfo aus""" | 497 """gibt param von dlInfo aus""" |
498 if docinfo is None: | 498 if docinfo is None: |
499 docinfo = {} | 499 docinfo = {} |
500 | 500 |
501 for x in range(cut): | 501 for x in range(cut): |
502 | 502 path=getParentDir(path) |
503 path=getParentDir(path) | |
504 | 503 |
505 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path | 504 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path |
506 | 505 |
507 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) | 506 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) |
508 | 507 |
628 | 627 |
629 docinfo['indexMetaPath']=self.getIndexMetaPath(path); | 628 docinfo['indexMetaPath']=self.getIndexMetaPath(path); |
630 | 629 |
631 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) | 630 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) |
632 # put in all raw bib fields as dict "bib" | 631 # put in all raw bib fields as dict "bib" |
633 bib = dom.find(".//bib/*") | 632 bib = dom.find(".//bib") |
634 #bib = dom.xpath("//bib/*") | 633 #bib = dom.xpath("//bib/*") |
635 if bib and len(bib)>0: | 634 if bib is not None: |
636 bibinfo = {} | 635 bibinfo = {} |
637 for e in bib: | 636 for e in bib: |
638 bibinfo[e.localName] = getTextFromNode(e) | 637 bibinfo[e.tag] = getText(e) |
638 | |
639 docinfo['bib'] = bibinfo | 639 docinfo['bib'] = bibinfo |
640 | 640 |
641 # extract some fields (author, title, year) according to their mapping | 641 # extract some fields (author, title, year) according to their mapping |
642 metaData=self.metadata.main.meta.bib | 642 metaData=self.metadata.main.meta.bib |
643 bib = dom.find(".//bib") | |
644 bibtype=bib.get("type") | 643 bibtype=bib.get("type") |
645 #bibtype=dom.xpath("//bib/@type") | 644 #bibtype=dom.xpath("//bib/@type") |
646 if not bibtype: | 645 if not bibtype: |
647 bibtype="generic" | 646 bibtype="generic" |
648 | 647 |
709 # docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0]) | 708 # docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0]) |
710 # except: | 709 # except: |
711 # docinfo['isbn_issn']='' | 710 # docinfo['isbn_issn']='' |
712 return docinfo | 711 return docinfo |
713 | 712 |
714 | 713 |
714 # TODO: is this needed? | |
715 def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | 715 def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): |
716 """gets name info from the index.meta file at path or given by dom""" | 716 """gets name info from the index.meta file at path or given by dom""" |
717 if docinfo is None: | 717 if docinfo is None: |
718 docinfo = {} | 718 docinfo = {} |
719 | 719 |
737 dom = self.getDomFromIndexMeta(url) | 737 dom = self.getDomFromIndexMeta(url) |
738 | 738 |
739 archivePath = None | 739 archivePath = None |
740 archiveName = None | 740 archiveName = None |
741 | 741 |
742 archiveName = getTextFromNode(dom.find("name")) | 742 archiveName = getText(dom.find("name")) |
743 if not archiveName: | 743 if not archiveName: |
744 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) | 744 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) |
745 | 745 |
746 archivePath = getTextFromNode(dom.find("archive-path")) | 746 archivePath = getText(dom.find("archive-path")) |
747 if archivePath: | 747 if archivePath: |
748 # clean up archive path | 748 # clean up archive path |
749 if archivePath[0] != '/': | 749 if archivePath[0] != '/': |
750 archivePath = '/' + archivePath | 750 archivePath = '/' + archivePath |
751 if archiveName and (not archivePath.endswith(archiveName)): | 751 if archiveName and (not archivePath.endswith(archiveName)): |
805 #docinfo = self.getNumTextPages(docinfo) | 805 #docinfo = self.getNumTextPages(docinfo) |
806 | 806 |
807 | 807 |
808 presentationUrl = getText(dom.find(".//texttool/presentation")) | 808 presentationUrl = getText(dom.find(".//texttool/presentation")) |
809 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag | 809 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag |
810 # TODO: is this needed here? | |
810 docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) | 811 docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) |
811 | 812 |
812 | 813 |
813 if presentationUrl: # ueberschreibe diese durch presentation informationen | 814 if presentationUrl: # ueberschreibe diese durch presentation informationen |
814 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten | 815 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten |
828 | 829 |
829 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): | 830 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): |
830 """gets the bibliographical information from the preseantion entry in texttools | 831 """gets the bibliographical information from the preseantion entry in texttools |
831 """ | 832 """ |
832 dom=self.getPresentationInfoXML(url) | 833 dom=self.getPresentationInfoXML(url) |
833 try: | 834 docinfo['author']=getText(dom.find(".//author")) |
834 docinfo['author']=getText(dom.find(".//author")) | 835 docinfo['title']=getText(dom.find(".//title")) |
835 except: | 836 docinfo['year']=getText(dom.find(".//date")) |
836 pass | |
837 try: | |
838 docinfo['title']=getText(dom.find(".//title")) | |
839 except: | |
840 pass | |
841 try: | |
842 docinfo['year']=getText(dom.find(".//date")) | |
843 except: | |
844 pass | |
845 return docinfo | 837 return docinfo |
846 | 838 |
847 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): | 839 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): |
848 """path ist the path to the images it assumes that the index.meta file is one level higher.""" | 840 """path ist the path to the images it assumes that the index.meta file is one level higher.""" |
849 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) | 841 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) |