comparison documentViewer.py @ 631:0c3aab828864

remove index meta ns
author Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
date Thu, 02 Jul 2015 10:27:05 +0200
parents e36bf3226fde
children 618b600c805a
comparison
equal deleted inserted replaced
629:e36bf3226fde 631:0c3aab828864
16 16
17 from Products.MetaDataProvider import MetaDataFolder 17 from Products.MetaDataProvider import MetaDataFolder
18 18
19 from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml, sslifyUrl 19 from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml, sslifyUrl
20 20
21
22 INDEXMETA_NS="http://md.mpiwg-berlin.mpg.de/ns/indexMeta#"
23
24 def removeINDEXMETA_NS(root): #entfernt den namespace von indexmeta aus dem dom #TODO evertyhing should be changed that it can deal with NS
25 for elem in root.getiterator():
26 print ("ETAG")
27 print(elem.tag)
28 if not hasattr(elem.tag, 'find'): continue # (1)
29
30 i = elem.tag.find('{%s}'%INDEXMETA_NS)
31 if i >= 0:
32 elem.tag = elem.tag[i+len(('{%s}'%INDEXMETA_NS)):]
33
34 print(elem.tag)
21 35
22 def getMDText(node): 36 def getMDText(node):
23 """returns the @text content from the MetaDataProvider metadata node""" 37 """returns the @text content from the MetaDataProvider metadata node"""
24 if isinstance(node, dict): 38 if isinstance(node, dict):
25 return node.get('@text', None) 39 return node.get('@text', None)
527 docUrl = None 541 docUrl = None
528 metaDom = None 542 metaDom = None
529 if mode=="texttool": 543 if mode=="texttool":
530 # url points to document dir or index.meta 544 # url points to document dir or index.meta
531 metaDom = self.metadataService.getDomFromPathOrUrl(url) 545 metaDom = self.metadataService.getDomFromPathOrUrl(url)
546 removeINDEXMETA_NS(metaDom)
547
532 if metaDom is None: 548 if metaDom is None:
533 raise IOError("Unable to find index.meta for mode=texttool!") 549 raise IOError("Unable to find index.meta for mode=texttool!")
534 550
535 docUrl = url.replace('/index.meta', '') 551 docUrl = url.replace('/index.meta', '')
536 if url.startswith('/mpiwg/online/'): 552 if url.startswith('/mpiwg/online/'):
537 docUrl = url.replace('/mpiwg/online/', '', 1) 553 docUrl = url.replace('/mpiwg/online/', '', 1)
554 elif mode=="textpath":
555 #url points to an textfile
556 #index.meta optional
557 #assume index.meta in parent dir
558 docUrl = getParentPath(url)
559 docinfo['viewmode'] = "text"
560
561 try:
562 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
563
564 removeINDEXMETA_NS(metaDom)
565
566
567
568 except:
569 metaDom = None
570 #metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
571 #docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
572 docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1)
573 docinfo['textURL'] = url
574 if docinfo.get("creator", None) is None:
575 docinfo['creator'] = ""
576
577 if docinfo.get("title", None) is None:
578 docinfo['title'] = ""
579
580 if docinfo.get("documentPath", None) is None:
581 docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1)
582 docinfo['documentPath'] = url.replace('/pages', '', 1)
583
584 docinfo['numPages'] = 1
538 585
539 elif mode=="imagepath": 586 elif mode=="imagepath":
540 # url points to folder with images, index.meta optional 587 # url points to folder with images, index.meta optional
541 # asssume index.meta in parent dir 588 # asssume index.meta in parent dir
542 docUrl = getParentPath(url) 589
543 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 590 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl)
591
544 docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) 592 docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1)
545 593
546 elif mode=="hocr": 594 elif mode=="hocr":
547 # url points to folder with images, index.meta optional 595 # url points to folder with images, index.meta optional
548 # asssume index.meta in parent dir 596 # asssume index.meta in parent dir
572 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 620 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
573 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 621 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
574 622
575 docinfo['documentUrl'] = docUrl 623 docinfo['documentUrl'] = docUrl
576 # process index.meta contents 624 # process index.meta contents
577 if metaDom is not None and metaDom.tag == 'resource': 625
626 if metaDom is not None and (metaDom.tag == 'resource' or metaDom.tag == "{%s}resource"%INDEXMETA_NS):
627 print("MD")
578 # document directory name and path 628 # document directory name and path
579 resource = self.metadataService.getResourceData(dom=metaDom, recursive=1) 629 resource = self.metadataService.getResourceData(dom=metaDom, recursive=1)
580 if resource: 630 if resource:
581 docinfo = self.getDocinfoFromResource(docinfo, resource) 631 docinfo = self.getDocinfoFromResource(docinfo, resource)
582 632
583 # texttool info 633 # texttool info
584 texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True) 634 texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True)
585 if texttool: 635 if texttool:
586 docinfo = self.getDocinfoFromTexttool(docinfo, texttool) 636 docinfo = self.getDocinfoFromTexttool(docinfo, texttool)
587 # document info from full text server 637 # document info from full text server