Mercurial > hg > documentViewer
comparison documentViewer.py @ 465:224aad394350 elementtree
really works with new getDocinfo
author | casties |
---|---|
date | Fri, 29 Jul 2011 20:36:04 +0200 |
parents | 19bd41d95f62 |
children | 1641be8dc6b5 |
comparison
equal
deleted
inserted
replaced
464:19bd41d95f62 | 465:224aad394350 |
---|---|
261 viewMode="text" | 261 viewMode="text" |
262 else: | 262 else: |
263 viewMode="images" | 263 viewMode="images" |
264 | 264 |
265 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) | 265 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) |
266 | |
266 | 267 |
267 security.declareProtected('View','index_html') | 268 security.declareProtected('View','index_html') |
268 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): | 269 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): |
269 """ | 270 """ |
270 view it | 271 view it |
291 docinfo = self.getToc(mode=tocMode, docinfo=docinfo) | 292 docinfo = self.getToc(mode=tocMode, docinfo=docinfo) |
292 | 293 |
293 # auto viewMode: text_dict if text else images | 294 # auto viewMode: text_dict if text else images |
294 if viewMode=="auto": | 295 if viewMode=="auto": |
295 if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): | 296 if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): |
296 #texturl gesetzt und textViewer konfiguriert | |
297 viewMode="text_dict" | 297 viewMode="text_dict" |
298 else: | 298 else: |
299 viewMode="images" | 299 viewMode="images" |
300 | 300 |
301 pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode) | 301 pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, tocMode=tocMode) |
387 """link to documentviewer with parameter param set to val""" | 387 """link to documentviewer with parameter param set to val""" |
388 return self.getLink(param, val, params, baseUrl, '&') | 388 return self.getLink(param, val, params, baseUrl, '&') |
389 | 389 |
390 def getInfo_xml(self,url,mode): | 390 def getInfo_xml(self,url,mode): |
391 """returns info about the document as XML""" | 391 """returns info about the document as XML""" |
392 | |
393 if not self.digilibBaseUrl: | 392 if not self.digilibBaseUrl: |
394 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" | 393 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" |
395 | 394 |
396 docinfo = self.getDocinfo(mode=mode,url=url) | 395 docinfo = self.getDocinfo(mode=mode,url=url) |
397 pt = getattr(self.template, 'info_xml') | 396 pt = getattr(self.template, 'info_xml') |
398 return pt(docinfo=docinfo) | 397 return pt(docinfo=docinfo) |
399 | 398 |
400 def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True): | |
401 """returns new option state""" | |
402 if not self.REQUEST.SESSION.has_key(optionName): | |
403 # not in session -- initial | |
404 opt = {'lastState': newState, 'state': initialState} | |
405 else: | |
406 opt = self.REQUEST.SESSION.get(optionName) | |
407 if opt['lastState'] != newState: | |
408 # state in session has changed -- toggle | |
409 opt['state'] = not opt['state'] | |
410 opt['lastState'] = newState | |
411 | |
412 self.REQUEST.SESSION[optionName] = opt | |
413 return opt['state'] | |
414 | |
415 def isAccessible(self, docinfo): | 399 def isAccessible(self, docinfo): |
416 """returns if access to the resource is granted""" | 400 """returns if access to the resource is granted""" |
417 access = docinfo.get('accessType', None) | 401 access = docinfo.get('accessType', None) |
418 logging.debug("documentViewer (accessOK) access type %s"%access) | 402 logging.debug("documentViewer (accessOK) access type %s"%access) |
419 if access is not None and access == 'free': | 403 if access == 'free': |
420 logging.debug("documentViewer (accessOK) access is free") | 404 logging.debug("documentViewer (accessOK) access is free") |
421 return True | 405 return True |
406 | |
422 elif access is None or access in self.authgroups: | 407 elif access is None or access in self.authgroups: |
423 # only local access -- only logged in users | 408 # only local access -- only logged in users |
424 user = getSecurityManager().getUser() | 409 user = getSecurityManager().getUser() |
425 logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr())) | 410 logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr())) |
426 if user is not None: | 411 if user is not None: |
430 return False | 415 return False |
431 | 416 |
432 logging.error("documentViewer (accessOK) unknown access type %s"%access) | 417 logging.error("documentViewer (accessOK) unknown access type %s"%access) |
433 return False | 418 return False |
434 | 419 |
435 | |
436 def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): | |
437 """gibt param von dlInfo aus""" | |
438 if docinfo is None: | |
439 docinfo = {} | |
440 | |
441 for x in range(cut): | |
442 path=getParentPath(path) | |
443 | |
444 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path | |
445 | |
446 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) | |
447 | |
448 txt = getHttpData(infoUrl) | |
449 if txt is None: | |
450 raise IOError("Unable to get dir-info from %s"%(infoUrl)) | |
451 | |
452 dom = ET.fromstring(txt) | |
453 #dom = Parse(txt) | |
454 size=getText(dom.find("size")) | |
455 #sizes=dom.xpath("//dir/size") | |
456 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size) | |
457 | |
458 if size: | |
459 docinfo['numPages'] = int(size) | |
460 else: | |
461 docinfo['numPages'] = 0 | |
462 | |
463 # TODO: produce and keep list of image names and numbers | |
464 | |
465 return docinfo | |
466 | |
467 def getIndexMetaPath(self,url): | |
468 """gib nur den Pfad zurueck""" | |
469 regexp = re.compile(r".*(experimental|permanent)/(.*)") | |
470 regpath = regexp.match(url) | |
471 if (regpath==None): | |
472 return "" | |
473 logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2)) | |
474 return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2)) | |
475 | |
476 | |
477 | |
478 def getIndexMetaUrl(self,url): | |
479 """returns utr of index.meta document at url""" | |
480 | |
481 metaUrl = None | |
482 if url.startswith("http://"): | |
483 # real URL | |
484 metaUrl = url | |
485 else: | |
486 # online path | |
487 server=self.digilibBaseUrl+"/servlet/Texter?fn=" | |
488 metaUrl=server+url.replace("/mpiwg/online","") | |
489 if not metaUrl.endswith("index.meta"): | |
490 metaUrl += "/index.meta" | |
491 | |
492 return metaUrl | |
493 | |
494 def getDomFromIndexMeta(self, url): | |
495 """get dom from index meta""" | |
496 dom = None | |
497 metaUrl = self.getIndexMetaUrl(url) | |
498 | |
499 logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl) | |
500 txt=getHttpData(metaUrl) | |
501 if txt is None: | |
502 raise IOError("Unable to read index meta from %s"%(url)) | |
503 | |
504 dom = ET.fromstring(txt) | |
505 #dom = Parse(txt) | |
506 return dom | |
507 | |
508 def getPresentationInfoXML(self, url): | |
509 """returns dom of info.xml document at url""" | |
510 dom = None | |
511 metaUrl = None | |
512 if url.startswith("http://"): | |
513 # real URL | |
514 metaUrl = url | |
515 else: | |
516 # online path | |
517 server=self.digilibBaseUrl+"/servlet/Texter?fn=" | |
518 metaUrl=server+url.replace("/mpiwg/online","") | |
519 | |
520 txt=getHttpData(metaUrl) | |
521 if txt is None: | |
522 raise IOError("Unable to read infoXMLfrom %s"%(url)) | |
523 | |
524 dom = ET.fromstring(txt) | |
525 #dom = Parse(txt) | |
526 return dom | |
527 | |
528 | |
529 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | |
530 """gets authorization info from the index.meta file at path or given by dom""" | |
531 logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) | |
532 | |
533 access = None | |
534 | |
535 if docinfo is None: | |
536 docinfo = {} | |
537 | |
538 if dom is None: | |
539 for x in range(cut): | |
540 path=getParentPath(path) | |
541 dom = self.getDomFromIndexMeta(path) | |
542 | |
543 acc = dom.find(".//access-conditions/access") | |
544 if acc is not None: | |
545 acctype = acc.get('type') | |
546 #acctype = dom.xpath("//access-conditions/access/@type") | |
547 if acctype: | |
548 access=acctype | |
549 if access in ['group', 'institution']: | |
550 access = dom.find(".//access-conditions/access/name").text.lower() | |
551 | |
552 docinfo['accessType'] = access | |
553 return docinfo | |
554 | |
555 | |
556 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | |
557 """gets bibliographical info from the index.meta file at path or given by dom""" | |
558 logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) | |
559 | |
560 if docinfo is None: | |
561 docinfo = {} | |
562 | |
563 if dom is None: | |
564 for x in range(cut): | |
565 path=getParentDir(path) | |
566 dom = self.getDomFromIndexMeta(path) | |
567 | |
568 docinfo['indexMetaPath']=self.getIndexMetaPath(path); | |
569 | |
570 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) | |
571 if self.metadataService is not None: | |
572 # put all raw bib fields in dict "bib" | |
573 bib = self.metadataService.getBibData(dom=dom) | |
574 docinfo['bib'] = bib | |
575 bibtype = bib.get('@type', None) | |
576 docinfo['bib_type'] = bibtype | |
577 # also store DC metadata for convenience | |
578 dc = self.metadataService.getDCMappedData(bib) | |
579 docinfo['creator'] = dc.get('creator',None) | |
580 docinfo['title'] = dc.get('title',None) | |
581 docinfo['date'] = dc.get('date',None) | |
582 else: | |
583 logging.error("MetadataService not found!") | |
584 return docinfo | |
585 | |
586 | |
587 # TODO: is this needed? | |
588 def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): | |
589 """gets name info from the index.meta file at path or given by dom""" | |
590 if docinfo is None: | |
591 docinfo = {} | |
592 | |
593 if dom is None: | |
594 for x in range(cut): | |
595 path=getParentPath(path) | |
596 dom = self.getDomFromIndexMeta(path) | |
597 | |
598 docinfo['name']=getText(dom.find("name")) | |
599 logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) | |
600 return docinfo | |
601 | |
602 | |
603 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): | |
604 """parse texttool tag in index meta""" | |
605 logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) | |
606 if docinfo is None: | |
607 docinfo = {} | |
608 if docinfo.get('lang', None) is None: | |
609 docinfo['lang'] = '' # default keine Sprache gesetzt | |
610 if dom is None: | |
611 dom = self.getDomFromIndexMeta(url) | |
612 | |
613 texttool = self.metadata.getTexttoolData(dom=dom) | |
614 | |
615 archivePath = None | |
616 archiveName = None | |
617 | |
618 archiveName = getText(dom.find("name")) | |
619 if not archiveName: | |
620 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) | |
621 | |
622 archivePath = getText(dom.find("archive-path")) | |
623 if archivePath: | |
624 # clean up archive path | |
625 if archivePath[0] != '/': | |
626 archivePath = '/' + archivePath | |
627 if archiveName and (not archivePath.endswith(archiveName)): | |
628 archivePath += "/" + archiveName | |
629 else: | |
630 # try to get archive-path from url | |
631 logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) | |
632 if (not url.startswith('http')): | |
633 archivePath = url.replace('index.meta', '') | |
634 | |
635 if archivePath is None: | |
636 # we balk without archive-path | |
637 raise IOError("Missing archive-path (for text-tool) in %s" % (url)) | |
638 | |
639 imageDir = texttool.get('image', None) | |
640 | |
641 if not imageDir: | |
642 # we balk with no image tag / not necessary anymore because textmode is now standard | |
643 #raise IOError("No text-tool info in %s"%(url)) | |
644 imageDir = "" | |
645 #xquery="//pb" | |
646 docinfo['imagePath'] = "" # keine Bilder | |
647 docinfo['imageURL'] = "" | |
648 | |
649 if imageDir and archivePath: | |
650 #print "image: ", imageDir, " archivepath: ", archivePath | |
651 imageDir = os.path.join(archivePath, imageDir) | |
652 imageDir = imageDir.replace("/mpiwg/online", '') | |
653 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) | |
654 docinfo['imagePath'] = imageDir | |
655 | |
656 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir | |
657 | |
658 viewerUrl = texttool.get('digiliburlprefix', None) | |
659 if viewerUrl: | |
660 docinfo['viewerURL'] = viewerUrl | |
661 | |
662 # old style text URL | |
663 textUrl = texttool.get('text', None) | |
664 if textUrl: | |
665 if urlparse.urlparse(textUrl)[0] == "": #keine url | |
666 textUrl = os.path.join(archivePath, textUrl) | |
667 # fix URLs starting with /mpiwg/online | |
668 if textUrl.startswith("/mpiwg/online"): | |
669 textUrl = textUrl.replace("/mpiwg/online", '', 1) | |
670 | |
671 docinfo['textURL'] = textUrl | |
672 | |
673 # new style text-url-path | |
674 textUrl = texttool.get('text-url-path', None) | |
675 if textUrl: | |
676 docinfo['textURLPath'] = textUrl | |
677 textUrlkurz = string.split(textUrl, ".")[0] | |
678 docinfo['textURLPathkurz'] = textUrlkurz | |
679 #if not docinfo['imagePath']: | |
680 # text-only, no page images | |
681 #docinfo = self.getNumTextPages(docinfo) | |
682 | |
683 # get bib info | |
684 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag | |
685 # TODO: is this needed here? | |
686 docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) | |
687 | |
688 # TODO: what to do with presentation? | |
689 presentationUrl = texttool.get('presentation', None) | |
690 if presentationUrl: # ueberschreibe diese durch presentation informationen | |
691 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten | |
692 # durch den relativen Pfad auf die presentation infos | |
693 presentationPath = presentationUrl | |
694 if url.endswith("index.meta"): | |
695 presentationUrl = url.replace('index.meta', presentationPath) | |
696 else: | |
697 presentationUrl = url + "/" + presentationPath | |
698 | |
699 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) | |
700 | |
701 # get authorization | |
702 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info | |
703 | |
704 return docinfo | |
705 | |
706 | |
707 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): | |
708 """gets the bibliographical information from the preseantion entry in texttools | |
709 """ | |
710 dom=self.getPresentationInfoXML(url) | |
711 docinfo['author']=getText(dom.find(".//author")) | |
712 docinfo['title']=getText(dom.find(".//title")) | |
713 docinfo['year']=getText(dom.find(".//date")) | |
714 return docinfo | |
715 | |
716 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): | |
717 """path ist the path to the images it assumes that the index.meta file is one level higher.""" | |
718 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) | |
719 if docinfo is None: | |
720 docinfo = {} | |
721 path=path.replace("/mpiwg/online","") | |
722 docinfo['imagePath'] = path | |
723 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) | |
724 | |
725 pathorig=path | |
726 for x in range(cut): | |
727 path=getParentPath(path) | |
728 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) | |
729 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path | |
730 docinfo['imageURL'] = imageUrl | |
731 | |
732 #TODO: use getDocinfoFromIndexMeta | |
733 #path ist the path to the images it assumes that the index.meta file is one level higher. | |
734 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) | |
735 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) | |
736 return docinfo | |
737 | |
738 | |
739 def OLDgetDocinfo(self, mode, url): | |
740 """returns docinfo depending on mode""" | |
741 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) | |
742 # look for cached docinfo in session | |
743 if self.REQUEST.SESSION.has_key('docinfo'): | |
744 docinfo = self.REQUEST.SESSION['docinfo'] | |
745 # check if its still current | |
746 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: | |
747 logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys()) | |
748 return docinfo | |
749 | |
750 # new docinfo | |
751 docinfo = {'mode': mode, 'url': url} | |
752 # add self url | |
753 docinfo['viewerUrl'] = self.getDocumentViewerURL() | |
754 if mode=="texttool": | |
755 # index.meta with texttool information | |
756 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) | |
757 elif mode=="imagepath": | |
758 # folder with images, index.meta optional | |
759 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) | |
760 elif mode=="filepath": | |
761 # filename | |
762 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) | |
763 else: | |
764 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) | |
765 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) | |
766 | |
767 logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) | |
768 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) | |
769 # store in session | |
770 self.REQUEST.SESSION['docinfo'] = docinfo | |
771 return docinfo | |
772 | 420 |
773 | 421 |
774 def getDocinfo(self, mode, url): | 422 def getDocinfo(self, mode, url): |
775 """returns docinfo depending on mode""" | 423 """returns docinfo depending on mode""" |
776 logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url)) | 424 logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url)) |
827 | 475 |
828 # bib info | 476 # bib info |
829 bib = self.metadataService.getBibData(dom=metaDom) | 477 bib = self.metadataService.getBibData(dom=metaDom) |
830 if bib: | 478 if bib: |
831 docinfo = self.getDocinfoFromBib(docinfo, bib) | 479 docinfo = self.getDocinfoFromBib(docinfo, bib) |
480 else: | |
481 # no bib - try info.xml | |
482 docinfo = self.getDocinfoFromPresentationInfoXml(docinfo) | |
832 | 483 |
833 # auth info | 484 # auth info |
834 access = self.metadataService.getAccessData(dom=metaDom) | 485 access = self.metadataService.getAccessData(dom=metaDom) |
835 if access: | 486 if access: |
836 docinfo = self.getDocinfoFromAccess(docinfo, access) | 487 docinfo = self.getDocinfoFromAccess(docinfo, access) |
837 | 488 |
838 # image path | 489 # image path |
839 if mode != 'texttool': | 490 if mode != 'texttool': |
840 # override image path from texttool | 491 # override image path from texttool |
841 docinfo['imagePath'] = url | 492 docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1) |
842 | 493 |
843 # number of images from digilib | 494 # number of images from digilib |
844 if docinfo.get('imagePath', None): | 495 if docinfo.get('imagePath', None): |
845 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath'] | 496 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath'] |
846 docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath']) | 497 docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath']) |
867 else: | 518 else: |
868 # use docUrl as docPath | 519 # use docUrl as docPath |
869 docUrl = docinfo['documentURL'] | 520 docUrl = docinfo['documentURL'] |
870 if not docUrl.startswith('http:'): | 521 if not docUrl.startswith('http:'): |
871 docPath = docUrl | 522 docPath = docUrl |
872 | 523 if docPath: |
524 # fix URLs starting with /mpiwg/online | |
525 docPath = docPath.replace('/mpiwg/online', '', 1) | |
526 | |
873 docinfo['documentPath'] = docPath | 527 docinfo['documentPath'] = docPath |
874 return docinfo | 528 return docinfo |
875 | 529 |
876 def getDocinfoFromTexttool(self, docinfo, texttool): | 530 def getDocinfoFromTexttool(self, docinfo, texttool): |
877 """reads contents of texttool element into docinfo""" | 531 """reads contents of texttool element into docinfo""" |
887 # old style text URL | 541 # old style text URL |
888 textUrl = texttool.get('text', None) | 542 textUrl = texttool.get('text', None) |
889 if textUrl and docPath: | 543 if textUrl and docPath: |
890 if urlparse.urlparse(textUrl)[0] == "": #keine url | 544 if urlparse.urlparse(textUrl)[0] == "": #keine url |
891 textUrl = os.path.join(docPath, textUrl) | 545 textUrl = os.path.join(docPath, textUrl) |
892 # fix URLs starting with /mpiwg/online | |
893 textUrl = textUrl.replace('/mpiwg/online', '', 1) | |
894 | 546 |
895 docinfo['textURL'] = textUrl | 547 docinfo['textURL'] = textUrl |
896 | 548 |
897 # new style text-url-path | 549 # new style text-url-path |
898 textUrl = texttool.get('text-url-path', None) | 550 textUrl = texttool.get('text-url-path', None) |
903 #docinfo['textURLPathkurz'] = textUrlkurz | 555 #docinfo['textURLPathkurz'] = textUrlkurz |
904 | 556 |
905 # old presentation stuff | 557 # old presentation stuff |
906 presentation = texttool.get('presentation', None) | 558 presentation = texttool.get('presentation', None) |
907 if presentation and docPath: | 559 if presentation and docPath: |
908 docinfo['presentationPath'] = os.path.join(docPath, presentation) | 560 if presentation.startswith('http:'): |
561 docinfo['presentationUrl'] = presentation | |
562 else: | |
563 docinfo['presentationUrl'] = os.path.join(docPath, presentation) | |
909 | 564 |
910 return docinfo | 565 return docinfo |
911 | 566 |
912 def getDocinfoFromBib(self, docinfo, bib): | 567 def getDocinfoFromBib(self, docinfo, bib): |
913 """reads contents of bib element into docinfo""" | 568 """reads contents of bib element into docinfo""" |
569 logging.debug("getDocinfoFromBib bib=%s"%repr(bib)) | |
914 # put all raw bib fields in dict "bib" | 570 # put all raw bib fields in dict "bib" |
915 docinfo['bib'] = bib | 571 docinfo['bib'] = bib |
916 bibtype = bib.get('@type', None) | 572 bibtype = bib.get('@type', None) |
917 docinfo['bibType'] = bibtype | 573 docinfo['bibType'] = bibtype |
918 # also store DC metadata for convenience | 574 # also store DC metadata for convenience |
923 return docinfo | 579 return docinfo |
924 | 580 |
925 def getDocinfoFromAccess(self, docinfo, acc): | 581 def getDocinfoFromAccess(self, docinfo, acc): |
926 """reads contents of access element into docinfo""" | 582 """reads contents of access element into docinfo""" |
927 #TODO: also read resource type | 583 #TODO: also read resource type |
584 logging.debug("getDocinfoFromAccess acc=%s"%repr(acc)) | |
928 try: | 585 try: |
929 acctype = accc['@attr']['type'] | 586 acctype = acc['@attr']['type'] |
930 if acctype: | 587 if acctype: |
931 access=acctype | 588 access=acctype |
932 if access in ['group', 'institution']: | 589 if access in ['group', 'institution']: |
933 access = acc['name'].lower() | 590 access = acc['name'].lower() |
934 | 591 |
957 | 614 |
958 # TODO: produce and keep list of image names and numbers | 615 # TODO: produce and keep list of image names and numbers |
959 return docinfo | 616 return docinfo |
960 | 617 |
961 | 618 |
619 def getDocinfoFromPresentationInfoXml(self,docinfo): | |
620 """gets DC-like bibliographical information from the presentation entry in texttools""" | |
621 url = docinfo.get('presentationUrl', None) | |
622 if not url: | |
623 logging.error("getDocinfoFromPresentation: no URL!") | |
624 return docinfo | |
625 | |
626 dom = None | |
627 metaUrl = None | |
628 if url.startswith("http://"): | |
629 # real URL | |
630 metaUrl = url | |
631 else: | |
632 # online path | |
633 | |
634 server=self.digilibBaseUrl+"/servlet/Texter?fn=" | |
635 metaUrl=server+url | |
636 | |
637 txt=getHttpData(metaUrl) | |
638 if txt is None: | |
639 logging.error("Unable to read info.xml from %s"%(url)) | |
640 return docinfo | |
641 | |
642 dom = ET.fromstring(txt) | |
643 docinfo['creator']=getText(dom.find(".//author")) | |
644 docinfo['title']=getText(dom.find(".//title")) | |
645 docinfo['date']=getText(dom.find(".//date")) | |
646 return docinfo | |
647 | |
648 | |
962 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): | 649 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): |
963 """returns pageinfo with the given parameters""" | 650 """returns pageinfo with the given parameters""" |
964 pageinfo = {} | 651 pageinfo = {} |
965 current = getInt(current) | 652 current = getInt(current) |
966 | 653 |