comparison documentViewer.py @ 454:73e3273c7624 elementtree

more work
author casties
date Fri, 15 Jul 2011 11:02:26 +0200
parents beb7ccb92564
children 0a53fea83df7
comparison
equal deleted inserted replaced
453:beb7ccb92564 454:73e3273c7624
497 """gibt param von dlInfo aus""" 497 """gibt param von dlInfo aus"""
498 if docinfo is None: 498 if docinfo is None:
499 docinfo = {} 499 docinfo = {}
500 500
501 for x in range(cut): 501 for x in range(cut):
502 502 path=getParentDir(path)
503 path=getParentDir(path)
504 503
505 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 504 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
506 505
507 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) 506 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
508 507
628 627
629 docinfo['indexMetaPath']=self.getIndexMetaPath(path); 628 docinfo['indexMetaPath']=self.getIndexMetaPath(path);
630 629
631 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) 630 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
632 # put in all raw bib fields as dict "bib" 631 # put in all raw bib fields as dict "bib"
633 bib = dom.find(".//bib/*") 632 bib = dom.find(".//bib")
634 #bib = dom.xpath("//bib/*") 633 #bib = dom.xpath("//bib/*")
635 if bib and len(bib)>0: 634 if bib is not None:
636 bibinfo = {} 635 bibinfo = {}
637 for e in bib: 636 for e in bib:
638 bibinfo[e.localName] = getTextFromNode(e) 637 bibinfo[e.tag] = getText(e)
638
639 docinfo['bib'] = bibinfo 639 docinfo['bib'] = bibinfo
640 640
641 # extract some fields (author, title, year) according to their mapping 641 # extract some fields (author, title, year) according to their mapping
642 metaData=self.metadata.main.meta.bib 642 metaData=self.metadata.main.meta.bib
643 bib = dom.find(".//bib")
644 bibtype=bib.get("type") 643 bibtype=bib.get("type")
645 #bibtype=dom.xpath("//bib/@type") 644 #bibtype=dom.xpath("//bib/@type")
646 if not bibtype: 645 if not bibtype:
647 bibtype="generic" 646 bibtype="generic"
648 647
709 # docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0]) 708 # docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0])
710 # except: 709 # except:
711 # docinfo['isbn_issn']='' 710 # docinfo['isbn_issn']=''
712 return docinfo 711 return docinfo
713 712
714 713
714 # TODO: is this needed?
715 def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 715 def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
716 """gets name info from the index.meta file at path or given by dom""" 716 """gets name info from the index.meta file at path or given by dom"""
717 if docinfo is None: 717 if docinfo is None:
718 docinfo = {} 718 docinfo = {}
719 719
737 dom = self.getDomFromIndexMeta(url) 737 dom = self.getDomFromIndexMeta(url)
738 738
739 archivePath = None 739 archivePath = None
740 archiveName = None 740 archiveName = None
741 741
742 archiveName = getTextFromNode(dom.find("name")) 742 archiveName = getText(dom.find("name"))
743 if not archiveName: 743 if not archiveName:
744 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) 744 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
745 745
746 archivePath = getTextFromNode(dom.find("archive-path")) 746 archivePath = getText(dom.find("archive-path"))
747 if archivePath: 747 if archivePath:
748 # clean up archive path 748 # clean up archive path
749 if archivePath[0] != '/': 749 if archivePath[0] != '/':
750 archivePath = '/' + archivePath 750 archivePath = '/' + archivePath
751 if archiveName and (not archivePath.endswith(archiveName)): 751 if archiveName and (not archivePath.endswith(archiveName)):
805 #docinfo = self.getNumTextPages(docinfo) 805 #docinfo = self.getNumTextPages(docinfo)
806 806
807 807
808 presentationUrl = getText(dom.find(".//texttool/presentation")) 808 presentationUrl = getText(dom.find(".//texttool/presentation"))
809 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag 809 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
810 # TODO: is this needed here?
810 docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) 811 docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
811 812
812 813
813 if presentationUrl: # ueberschreibe diese durch presentation informationen 814 if presentationUrl: # ueberschreibe diese durch presentation informationen
814 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 815 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
828 829
829 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 830 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
830 """gets the bibliographical information from the preseantion entry in texttools 831 """gets the bibliographical information from the preseantion entry in texttools
831 """ 832 """
832 dom=self.getPresentationInfoXML(url) 833 dom=self.getPresentationInfoXML(url)
833 try: 834 docinfo['author']=getText(dom.find(".//author"))
834 docinfo['author']=getText(dom.find(".//author")) 835 docinfo['title']=getText(dom.find(".//title"))
835 except: 836 docinfo['year']=getText(dom.find(".//date"))
836 pass
837 try:
838 docinfo['title']=getText(dom.find(".//title"))
839 except:
840 pass
841 try:
842 docinfo['year']=getText(dom.find(".//date"))
843 except:
844 pass
845 return docinfo 837 return docinfo
846 838
847 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): 839 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
848 """path ist the path to the images it assumes that the index.meta file is one level higher.""" 840 """path ist the path to the images it assumes that the index.meta file is one level higher."""
849 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) 841 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))