Mercurial > hg > ZopePubmanConnector
changeset 14:d92decb037d6
bugs in publ
author | dwinter |
---|---|
date | Fri, 31 May 2013 11:40:00 +0200 |
parents | 43849c9cc08b |
children | ca3084877394 |
files | zopePubmanConnector.py |
diffstat | 1 files changed, 133 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/zopePubmanConnector.py Tue May 28 09:46:58 2013 +0200 +++ b/zopePubmanConnector.py Fri May 31 11:40:00 2013 +0200 @@ -16,6 +16,15 @@ cacheFolder ="/var/tmp/.cacheWWW" +ns = {'escidocMetadataProfile':"http://escidoc.mpg.de/metadataprofile/schema/0.1/", + 'escidocMetadataRecords':"http://www.escidoc.de/schemas/metadatarecords/0.4", + 'dc':'http://purl.org/dc/elements/1.1/', + 'escidocComponents':'http://www.escidoc.de/schemas/components/0.8', + 'escidocItem':'http://www.escidoc.de/schemas/item/0.8' + } + + + def zptFile(self, path, orphaned=False): """returns a page template file from the product""" if orphaned: @@ -90,15 +99,70 @@ citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation" objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item" + + + citations=root.findall(objxpath) - + logging.debug(len(citations)) ret=[] for citation in citations: objId = citation.get('objid') - + text = citation.find(citationxpath) - ret.append((objId,text.text)) + + + + + idTermPath =""".//escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/escidocMetadataProfile:publication/dc:identifier""" + #idTermPath =".//{http://purl.org/dc/elements/1.1/}identifier" + + idterms = citation.findall(idTermPath,ns) + + linksIdentifier=[] + linksLocator=[] + + + bookID = None + + + for idterm in idterms: + if idterm.get("{http://www.w3.org/2001/XMLSchema-instance}type",'') in ['eterms:OTHER','eidt:OTHER']: ##suche nach bookID + logging.debug("zopePubmanConnector: %s"%idterm.text) + checkID =idterm.text.lstrip().rstrip() + if checkID.startswith("MPIWG-Book:"): + bookID = checkID + break + elif idterm.get("{http://www.w3.org/2001/XMLSchema-instance}type",'') in ['eterms:URI','eidt:URI']: + linksIdentifier.append(idterm.text.lstrip().rstrip()) + + + + + componentsPath =""".//escidocComponents:components[1]""" + + components=citation.findall(componentsPath,ns); + + for component in components: + cnt = component.find(".//escidocComponents:content",ns) + if cnt is not None: + link="" + title="" + type="" + for name,value in cnt.items(): + if name.endswith("href"): + link=value + elif name.endswith("title"): + title=value + elif name.endswith("storage"): + type=value + + linksLocator.append((title,link,type)) + + + + + ret.append((objId,text.text,bookID,linksIdentifier,linksLocator)) @@ -211,6 +275,8 @@ def getEntryFromPubman(self,escidocid,extendedData=None): """get one entry""" + + escidocid=escidocid.lstrip().strip() h = httplib2.Http(cacheFolder) cn = self.connectorString+"cqlQuery=escidoc.objid=%s&" @@ -225,18 +291,73 @@ citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation" - citation=root.find(citationxpath) + itempath = ".//escidocItem:item" + + item = root.find(itempath,ns) #get item + + citation=item.find(citationxpath,ns) + + + if citation is not None and extendedData is not None: + + linksIdentifier=[] + linksLocator=[] + - if citation is not None and extendedData is not None: - ns = {'escidocMetadataProfile':"http://escidoc.mpg.de/metadataprofile/schema/0.1/", - 'escidocMetadataRecords':"http://www.escidoc.de/schemas/metadatarecords/0.4" - } - + #get identifier + idTermPath =""".//escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/escidocMetadataProfile:publication/dc:identifier""" + #idTermPath =".//{http://purl.org/dc/elements/1.1/}identifier" + + idterms = item.findall(idTermPath,ns) + + bookID = None + logging.debug("zopePubmanConnector: %s"%idterms) + for idterm in idterms: + + if idterm.get("{http://www.w3.org/2001/XMLSchema-instance}type",'') in ['eterms:OTHER','eidt:OTHER']: ##suche nach bookID + logging.debug("zopePubmanConnector: %s"%idterm.text) + checkID =idterm.text.lstrip().rstrip() + if checkID.startswith("MPIWG-Book:"): + bookID = checkID + break + elif idterm.get("{http://www.w3.org/2001/XMLSchema-instance}type",'') in ['eterms:URI','eidt:URI']: + linksIdentifier.append(idterm.text.lstrip().rstrip()) + + + #get files and locators + componentsPath =""".//escidocComponents:components[1]""" + + components=item.findall(componentsPath,ns); + + for component in components: + cnt = component.find(".//escidocComponents:content",ns) + if cnt is not None: + link="" + title="" + type="" + for name,value in cnt.items(): + if name.endswith("href"): + link=value + elif name.endswith("title"): + title=value + elif name.endswith("storage"): + type=value + + linksLocator.append((title,link,type)) + + + + + + + + + path = ".//escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/escidocMetadataProfile:publication" - - publicationTag= root.find(path,ns); - return citation.text,publicationTag.get('type') + publicationTag= item.find(path,ns); + + return citation.text,publicationTag.get('type'),bookID,linksIdentifier,linksLocator if citation is not None: