Mercurial > hg > documentViewer
comparison documentViewer.py @ 631:0c3aab828864
remove index meta ns
author | Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de> |
---|---|
date | Thu, 02 Jul 2015 10:27:05 +0200 |
parents | e36bf3226fde |
children | 618b600c805a |
comparison
equal
deleted
inserted
replaced
629:e36bf3226fde | 631:0c3aab828864 |
---|---|
16 | 16 |
17 from Products.MetaDataProvider import MetaDataFolder | 17 from Products.MetaDataProvider import MetaDataFolder |
18 | 18 |
19 from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml, sslifyUrl | 19 from SrvTxtUtils import getInt, utf8ify, getText, getHttpData, refreshingImageFileIndexHtml, sslifyUrl |
20 | 20 |
21 | |
22 INDEXMETA_NS="http://md.mpiwg-berlin.mpg.de/ns/indexMeta#" | |
23 | |
24 def removeINDEXMETA_NS(root): #entfernt den namespace von indexmeta aus dem dom #TODO evertyhing should be changed that it can deal with NS | |
25 for elem in root.getiterator(): | |
26 print ("ETAG") | |
27 print(elem.tag) | |
28 if not hasattr(elem.tag, 'find'): continue # (1) | |
29 | |
30 i = elem.tag.find('{%s}'%INDEXMETA_NS) | |
31 if i >= 0: | |
32 elem.tag = elem.tag[i+len(('{%s}'%INDEXMETA_NS)):] | |
33 | |
34 print(elem.tag) | |
21 | 35 |
22 def getMDText(node): | 36 def getMDText(node): |
23 """returns the @text content from the MetaDataProvider metadata node""" | 37 """returns the @text content from the MetaDataProvider metadata node""" |
24 if isinstance(node, dict): | 38 if isinstance(node, dict): |
25 return node.get('@text', None) | 39 return node.get('@text', None) |
527 docUrl = None | 541 docUrl = None |
528 metaDom = None | 542 metaDom = None |
529 if mode=="texttool": | 543 if mode=="texttool": |
530 # url points to document dir or index.meta | 544 # url points to document dir or index.meta |
531 metaDom = self.metadataService.getDomFromPathOrUrl(url) | 545 metaDom = self.metadataService.getDomFromPathOrUrl(url) |
546 removeINDEXMETA_NS(metaDom) | |
547 | |
532 if metaDom is None: | 548 if metaDom is None: |
533 raise IOError("Unable to find index.meta for mode=texttool!") | 549 raise IOError("Unable to find index.meta for mode=texttool!") |
534 | 550 |
535 docUrl = url.replace('/index.meta', '') | 551 docUrl = url.replace('/index.meta', '') |
536 if url.startswith('/mpiwg/online/'): | 552 if url.startswith('/mpiwg/online/'): |
537 docUrl = url.replace('/mpiwg/online/', '', 1) | 553 docUrl = url.replace('/mpiwg/online/', '', 1) |
554 elif mode=="textpath": | |
555 #url points to an textfile | |
556 #index.meta optional | |
557 #assume index.meta in parent dir | |
558 docUrl = getParentPath(url) | |
559 docinfo['viewmode'] = "text" | |
560 | |
561 try: | |
562 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) | |
563 | |
564 removeINDEXMETA_NS(metaDom) | |
565 | |
566 | |
567 | |
568 except: | |
569 metaDom = None | |
570 #metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) | |
571 #docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) | |
572 docinfo['textURLPath'] = url.replace('/mpiwg/online', '', 1) | |
573 docinfo['textURL'] = url | |
574 if docinfo.get("creator", None) is None: | |
575 docinfo['creator'] = "" | |
576 | |
577 if docinfo.get("title", None) is None: | |
578 docinfo['title'] = "" | |
579 | |
580 if docinfo.get("documentPath", None) is None: | |
581 docinfo['documentPath'] = url.replace('/mpiwg/online', '', 1) | |
582 docinfo['documentPath'] = url.replace('/pages', '', 1) | |
583 | |
584 docinfo['numPages'] = 1 | |
538 | 585 |
539 elif mode=="imagepath": | 586 elif mode=="imagepath": |
540 # url points to folder with images, index.meta optional | 587 # url points to folder with images, index.meta optional |
541 # asssume index.meta in parent dir | 588 # asssume index.meta in parent dir |
542 docUrl = getParentPath(url) | 589 |
543 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) | 590 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) |
591 | |
544 docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) | 592 docinfo['imagePath'] = url.replace('/mpiwg/online', '', 1) |
545 | 593 |
546 elif mode=="hocr": | 594 elif mode=="hocr": |
547 # url points to folder with images, index.meta optional | 595 # url points to folder with images, index.meta optional |
548 # asssume index.meta in parent dir | 596 # asssume index.meta in parent dir |
572 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) | 620 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) |
573 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) | 621 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) |
574 | 622 |
575 docinfo['documentUrl'] = docUrl | 623 docinfo['documentUrl'] = docUrl |
576 # process index.meta contents | 624 # process index.meta contents |
577 if metaDom is not None and metaDom.tag == 'resource': | 625 |
626 if metaDom is not None and (metaDom.tag == 'resource' or metaDom.tag == "{%s}resource"%INDEXMETA_NS): | |
627 print("MD") | |
578 # document directory name and path | 628 # document directory name and path |
579 resource = self.metadataService.getResourceData(dom=metaDom, recursive=1) | 629 resource = self.metadataService.getResourceData(dom=metaDom, recursive=1) |
580 if resource: | 630 if resource: |
581 docinfo = self.getDocinfoFromResource(docinfo, resource) | 631 docinfo = self.getDocinfoFromResource(docinfo, resource) |
582 | 632 |
583 # texttool info | 633 # texttool info |
584 texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True) | 634 texttool = self.metadataService.getTexttoolData(dom=metaDom, recursive=1, all=True) |
585 if texttool: | 635 if texttool: |
586 docinfo = self.getDocinfoFromTexttool(docinfo, texttool) | 636 docinfo = self.getDocinfoFromTexttool(docinfo, texttool) |
587 # document info from full text server | 637 # document info from full text server |