Mercurial > hg > ZopePubmanConnector
annotate zopePubmanConnector.py @ 11:69a2f0b8a932
sortieren von treffer listen nach typen
| author | dwinter |
|---|---|
| date | Sun, 05 May 2013 17:10:22 +0200 |
| parents | 007ba22a5eb1 |
| children | 55294833888f |
| rev | line source |
|---|---|
| 0 | 1 # -*- coding: utf-8 -*- |
| 2 | |
| 3 #Verbindet Zope mit pubman. | |
| 4 | |
| 5 | |
| 6 from OFS.SimpleItem import SimpleItem | |
| 7 from Products.PageTemplates.PageTemplateFile import PageTemplateFile | |
| 8 import os.path | |
| 9 | |
| 10 from Globals import package_home | |
| 11 import httplib2 | |
| 12 import xml.etree.ElementTree as ET | |
| 1 | 13 import logging |
| 9 | 14 import time |
| 10 | 15 import unicodedata |
| 6 | 16 |
| 17 cacheFolder ="/var/tmp/.cacheWWW" | |
| 18 | |
| 0 | 19 def zptFile(self, path, orphaned=False): |
| 20 """returns a page template file from the product""" | |
| 21 if orphaned: | |
| 22 # unusual case | |
| 23 pt=PageTemplateFile(os.path.join(package_home(globals()), path)) | |
| 24 else: | |
| 25 | |
| 26 pt=PageTemplateFile(os.path.join(package_home(globals()), path)).__of__(self) | |
| 27 return pt | |
| 28 | |
| 29 class ZopePubmanConnector(SimpleItem): | |
| 30 | |
| 31 | |
| 32 connectorString="http://pubman.mpiwg-berlin.mpg.de/search/SearchAndExport?" | |
| 33 | |
| 34 | |
| 35 meta_type="ZopePubmanConnector" | |
| 36 | |
| 37 manage_options= ({'label':'Main Config','action': 'changeMain'},) + SimpleItem.manage_options | |
| 38 | |
| 39 def __init__(self,id,title,pubmanURL): | |
| 40 self.id=id | |
| 41 self.title=title | |
| 42 self.pubmanURL=pubmanURL #URL einer pubman instance bzw. einer collection, falls nicht die default collection benutzt werden soll | |
| 43 | |
| 44 | |
| 45 | |
| 46 def changeMain(self,pubmanURL=None,title=None,REQUEST=None,RESPONSE=None): | |
| 47 """change main settings""" | |
| 48 if pubmanURL: | |
| 49 self.pubmanURL=pubmanURL | |
| 50 self.title=title | |
| 51 | |
| 52 if RESPONSE is not None: | |
| 53 RESPONSE.redirect('manage_main') | |
| 54 | |
| 55 | |
| 56 else: | |
| 57 pt=zptFile(self, 'zpt/ChangeZopePubmanConnector.zpt') | |
| 58 return pt() | |
| 59 | |
| 60 | |
| 2 | 61 def getPublications(self,personID,limit=None,publicationType=None): |
| 0 | 62 """get all publications der personID""" |
| 6 | 63 h = httplib2.Http(cacheFolder) |
| 1 | 64 |
| 65 | |
| 2 | 66 |
| 67 if publicationType is None: | |
| 11 | 68 # cn = self.connectorString+"cqlQuery=escidoc.any-identifier=%22"+personID+"%22&" |
| 69 cn = self.connectorString+"cqlQuery=escidoc.publication.creator.person.identifier=%22"+personID+"%22&" | |
| 2 | 70 else: |
| 11 | 71 #cn = self.connectorString+"cqlQuery=escidoc.any-identifier=%22"+personID+"%22" |
| 72 cn = self.connectorString+"cqlQuery=escidoc.publication.creator.person.identifier=%22"+personID+"%22&" | |
| 2 | 73 cn +="%20and%20escidoc.publication.type=%22"+publicationType+"%22&" |
| 74 | |
| 75 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending" | |
| 76 if limit: | |
| 77 cn+="&maximumRecords=%s"%limit | |
| 1 | 78 |
| 2 | 79 logging.debug(cn) |
| 80 resp, content = h.request(cn) | |
| 81 | |
| 3 | 82 |
| 2 | 83 |
| 0 | 84 ET.register_namespace("dcterms", "http://purl.org/dc/terms/") |
| 85 | |
| 86 root = ET.fromstring(content) | |
| 87 | |
| 2 | 88 #<escidocItem:item objid="escidoc:630782" |
| 89 | |
| 0 | 90 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation" |
| 91 | |
| 2 | 92 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item" |
| 93 citations=root.findall(objxpath) | |
| 0 | 94 |
| 95 ret=[] | |
| 96 for citation in citations: | |
| 2 | 97 objId = citation.get('objid') |
| 98 | |
| 99 text = citation.find(citationxpath) | |
| 100 | |
| 101 ret.append((objId,text.text)) | |
| 0 | 102 |
| 103 | |
| 104 | |
| 105 return ret | |
| 106 | |
| 2 | 107 |
| 5 | 108 def search(self,values={},exact=False,limit=None,contexts=None): |
| 2 | 109 |
| 110 """search pubman | |
| 111 @values map mit field->value | |
| 112 @return map mit escidocId -> XML-formatted snippeds | |
| 113 """ | |
| 114 | |
| 115 fieldToEscidoc={"title":"escidoc.any-title", | |
| 116 "author":"escidoc.publication.any.publication-creator-names", | |
| 10 | 117 "any":"escidoc.metadata"} |
| 2 | 118 |
| 119 | |
| 120 cn = self.connectorString+"cqlQuery=%s&" | |
| 121 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending" | |
| 122 | |
| 5 | 123 if limit: |
| 124 cn+="&maximumRecords=%s"%limit | |
| 125 | |
| 126 | |
| 2 | 127 |
| 128 | |
| 129 querys = [] | |
| 130 for field in values.keys(): | |
| 131 | |
| 132 searchField = fieldToEscidoc.get(field,None) | |
| 133 if searchField is None: | |
| 134 logging.debug("search, don't know field: %s"%field) | |
| 135 continue | |
| 136 | |
| 137 | |
| 138 value = values[field] | |
| 10 | 139 try: |
| 140 value=unicodedata.normalize('NFKD', value).encode('ASCII', 'ignore') | |
| 141 except: | |
| 142 value=unicodedata.normalize('NFKD', value.decode('utf-8')).encode('ASCII', 'ignore') | |
| 2 | 143 if value == '': |
| 144 continue | |
| 145 logging.debug("%s=%s"%(field,value)) | |
| 146 if not exact: | |
| 147 value=value+"*" | |
| 148 | |
| 149 | |
| 150 querys.append("%s=%%22%s%%22"%(searchField,value)) | |
| 151 | |
| 152 | |
| 10 | 153 query="%20AND%20".join(querys) |
| 5 | 154 |
| 155 | |
| 156 if contexts: # einscbraenken auf contexte | |
| 157 | |
| 158 if isinstance(contexts, str): | |
| 159 contexts=[contexts] | |
| 160 | |
| 161 ctxquerys=[] | |
| 162 for context in contexts: | |
| 163 ctxquerys.append("escidoc.context.objid=%%22%s%%22"%(context)) | |
| 164 | |
| 10 | 165 ctxquery="%20OR%20".join(ctxquerys) |
| 5 | 166 |
| 167 if query!="": | |
| 10 | 168 query=query+"AND%%20(%s)"%ctxquery |
| 5 | 169 else: |
| 170 query="(%s)"%ctxquery | |
| 171 | |
| 6 | 172 h = httplib2.Http(cacheFolder) |
| 2 | 173 |
| 174 logging.debug(cn%query) | |
| 175 resp, content = h.request(cn%query) | |
| 176 | |
| 177 ET.register_namespace("dcterms", "http://purl.org/dc/terms/") | |
| 178 | |
| 3 | 179 try: |
| 180 root = ET.fromstring(content) | |
| 181 except: | |
| 182 logging.error("Couldn't parse content of:%s"%(cn%query)) | |
| 183 return {} | |
| 2 | 184 #<escidocItem:item objid="escidoc:630782" |
| 185 | |
| 186 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation" | |
| 187 | |
| 188 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item" | |
| 189 citations=root.findall(objxpath) | |
| 190 | |
| 191 ret={} | |
| 192 for citation in citations: | |
| 193 objId = citation.get('objid') | |
| 194 | |
| 195 text = citation.find(citationxpath) | |
| 196 | |
| 197 ret[objId]=text.text | |
| 198 | |
| 199 | |
| 200 | |
| 201 return ret | |
| 202 | |
| 11 | 203 |
| 204 def getEntriesFromPubman(self,escidocids): | |
| 2 | 205 |
| 11 | 206 doctypes={} |
| 207 for escidocid in escidocids: | |
| 208 | |
| 209 txt, type = self.getEntryFromPubman(escidocid.escidocid, True) | |
| 210 | |
| 211 if not doctypes.has_key(type): | |
| 212 doctypes[type]=[] | |
| 213 | |
| 214 doctypes[type].append((escidocid.escidocid,txt)) | |
| 215 | |
| 216 | |
| 217 return doctypes | |
| 218 | |
| 219 | |
| 220 def getEntryFromPubman(self,escidocid,extendedData=None): | |
| 4 | 221 """get one entry""" |
| 222 | |
| 3 | 223 escidocid=escidocid.lstrip().strip() |
| 6 | 224 h = httplib2.Http(cacheFolder) |
| 2 | 225 cn = self.connectorString+"cqlQuery=escidoc.objid=%s&" |
| 226 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending" | |
| 227 | |
| 228 resp, content = h.request(cn%escidocid) | |
| 229 ET.register_namespace("dcterms", "http://purl.org/dc/terms/") | |
| 3 | 230 logging.debug(cn%escidocid) |
| 4 | 231 |
| 2 | 232 root = ET.fromstring(content) |
| 233 | |
| 234 | |
| 235 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation" | |
| 236 | |
| 237 citation=root.find(citationxpath) | |
| 238 | |
| 11 | 239 |
| 240 if citation is not None and extendedData is not None: | |
| 241 ns = {'escidocMetadataProfile':"http://escidoc.mpg.de/metadataprofile/schema/0.1/", | |
| 242 'escidocMetadataRecords':"http://www.escidoc.de/schemas/metadatarecords/0.4" | |
| 243 } | |
| 244 | |
| 245 path = ".//escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/escidocMetadataProfile:publication" | |
| 246 | |
| 247 publicationTag= root.find(path,ns); | |
| 248 return citation.text,publicationTag.get('type') | |
| 249 | |
| 2 | 250 if citation is not None: |
| 251 | |
| 252 return citation.text | |
| 11 | 253 |
| 254 return "",'' | |
| 2 | 255 |
| 0 | 256 def pubmanConnectorURL(self): |
| 257 return self.connectorString | |
| 4 | 258 |
| 259 | |
| 10 | 260 def getPublicationsFromContext(self,context,limit=None,publicationType=None,search=None): |
| 7 | 261 """gibt alle publicationen des context, jeweils als tupel ("escidoc:id",METADATEN) |
| 262 | |
| 263 METADATEN ist hierbei eine Map mit : | |
| 264 "citation" --> citation in der APA formatierung | |
| 265 "volume" --> volume | |
| 266 "link" --> dowloadlink | |
| 267 "abstracts" --> map mit deu/eng für den abstrakt | |
| 268 "authors" --> [(NACHNAME,VORNAME]),..] | |
| 269 "title"--> title | |
| 270 "year" --> issued | |
| 271 """ | |
| 6 | 272 h = httplib2.Http(cacheFolder) |
| 4 | 273 |
| 274 if publicationType is None: | |
| 10 | 275 cn = self.connectorString+"cqlQuery=(escidoc.context.objid=%22"+context+"%22" |
| 4 | 276 #cn = self.connectorString+"cqlQuery=escidoc.objid=%22"+"escidoc:643455"+"%22&" |
| 277 else: | |
| 10 | 278 cn = self.connectorString+"cqlQuery=(escidoc.context.objid=%22"+context+"%22" |
| 279 cn +="%20and%20escidoc.publication.type=%22"+publicationType+"%22" | |
| 4 | 280 |
| 10 | 281 if search is not None and search != "": |
| 282 try: | |
| 283 search = unicodedata.normalize('NFKD', search).encode('ASCII', 'ignore') | |
| 284 except: | |
| 285 search = unicodedata.normalize('NFKD', search.decode('utf-8')).encode('ASCII', 'ignore') | |
| 286 cn+="%20and%20escidoc.metadata="+search+"" | |
| 287 | |
| 288 | |
| 289 cn +=")&exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending" | |
| 4 | 290 if limit: |
| 291 cn+="&maximumRecords=%s"%limit | |
| 292 | |
| 9 | 293 startTime = time.time() |
| 294 logging.debug("getPublicationsFromContext: getting %s"%cn) | |
| 4 | 295 resp, content = h.request(cn) |
| 9 | 296 logging.debug("getPublicationsFromContext: got data in %ss"%(time.time()-startTime)) |
| 4 | 297 |
| 298 ET.register_namespace("dcterms", "http://purl.org/dc/terms/") | |
| 299 | |
| 300 root = ET.fromstring(content) | |
| 301 | |
| 302 #<escidocItem:item objid="escidoc:630782" | |
| 303 | |
| 304 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation" | |
| 305 abstractpath=".//{http://purl.org/dc/terms/}abstract" | |
| 6 | 306 issuedpath=".//{http://purl.org/dc/terms/}issued" |
| 307 | |
| 308 creatorpath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/publication}creator/{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}person" | |
| 309 familyNamepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}family-name" | |
| 310 givenNamepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}given-name" | |
| 311 | |
| 312 | |
| 313 titlepath=".//{http://purl.org/dc/elements/1.1/}title" | |
| 4 | 314 |
| 315 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item" | |
| 316 srcpath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/publication}source" | |
| 317 volumepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}volume" | |
| 318 | |
| 319 #linkspath=""".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content[@storage="internal-managed"]""" | |
| 320 linkspath=""".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content[@storage="external-url"]""" | |
| 321 #linkspath=".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content" | |
| 322 citations=root.findall(objxpath) | |
| 323 | |
| 324 ret=[] | |
| 325 for citation in citations: | |
| 326 objId = citation.get('objid') | |
| 327 | |
| 328 text = citation.find(citationxpath) | |
| 329 | |
| 330 #Get volume = preprintID | |
| 331 # <publication:source type="series"> | |
| 332 # <dc:title>Max-Planck-Institut für Wissenschaftsgeschichte : Preprint</dc:title> | |
| 333 # <escidoc:volume>437</escidoc:volume> | |
| 334 | |
| 335 src= citation.find(srcpath) | |
| 336 vol = src.find(volumepath) | |
| 337 | |
| 338 #get link to fulltext | |
| 339 #<escidocComponents:component objid="escidoc:644183"> | |
| 340 #<escidocComponents:properties> | |
| 341 # <prop:creation-date>2013-04-29T09:00:01.100Z</prop:creation-date> | |
| 342 # <prop:valid-status>valid</prop:valid-status> | |
| 343 # <prop:visibility>public</prop:visibility> | |
| 344 # <prop:content-category>pre-print</prop:content-category> | |
| 345 # <prop:file-name>P437.PDF</prop:file-name> | |
| 346 # <prop:mime-type>application/pdf</prop:mime-type> | |
| 347 # <prop:checksum>d0ccdc62d6707d934e60e9839ffe30bf</prop:checksum> | |
| 348 # <prop:checksum-algorithm>MD5</prop:checksum-algorithm> | |
| 349 #</escidocComponents:properties> | |
| 350 #<escidocComponents:content xlink:type="simple" xlink:title="P437.PDF" storage="internal-managed" | |
| 351 # xlink:href="http://pubman.mpiwg-berlin.mpg.de/pubman/item/escidoc:643686:3/component/escidoc:644183/P437.PDF"/> | |
| 352 # | |
| 353 | |
| 354 src= citation.find(linkspath) | |
| 355 if src is not None: | |
| 356 | |
| 357 link=src.get("{http://www.w3.org/1999/xlink}href") | |
| 358 #logging.debug(src.attrib) | |
| 359 | |
| 360 else: | |
| 361 link ="" | |
| 362 | |
| 363 #<dcterms:abstract xml:lang="deu">Dieser Preprint versammelt eine Auswahl von Beiträgen zum Symposium zu Ehren von Hans-Jörg Rheinbergers 65. Geburtstag. Es fand am 24.1.2011 im Max-Planck-Institute für Wissenschaftsgeschichte statt und brachte Freunde, Studenten und Kollegen von Hans-Jörg Rheinberger zusammen.</dcterms:abstract> | |
| 364 #<dcterms:abstract xml:lang="eng">In this preprint, a selection of contributions to the symposium in honor of Hans-Jörg Rheinberger’s 65th birthday is published. It took place on January 24, 2011 at the Max-Planck-Institute for the History of Science and assembled friends, students and colleagues of Hans-Jörg Rheinberger.</dcterms:abstract> | |
| 365 | |
| 366 abstracts = citation.findall(abstractpath) | |
| 367 | |
| 368 abstractTexts={} | |
| 369 for abstract in abstracts: | |
| 370 | |
| 371 lang = abstract.get("{http://www.w3.org/XML/1998/namespace}lang") | |
| 372 abstractTexts[lang]=abstract.text | |
| 6 | 373 |
| 374 authorsTags = citation.findall(creatorpath) | |
| 375 | |
| 376 authors=[] | |
| 377 for author in authorsTags: | |
| 378 | |
| 379 gn= author.find(givenNamepath).text | |
| 380 fn= author.find(familyNamepath).text | |
| 381 | |
| 382 authors.append((fn,gn)) | |
| 383 | |
|
8
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
384 titleTag = citation.find(titlepath) |
| 6 | 385 |
| 386 if titleTag is not None: | |
| 387 title = titleTag.text | |
| 388 else: | |
| 389 title="" | |
| 390 | |
| 391 issuedTag = citation.find(issuedpath) | |
| 392 | |
| 393 if issuedTag is not None: | |
| 394 issued = issuedTag.text | |
| 395 else: | |
| 396 issued="" | |
| 4 | 397 |
|
8
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
398 item = {"id":objId, |
|
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
399 "citation":text.text, |
|
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
400 "volume":vol.text, |
|
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
401 "link":link, |
|
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
402 "abstracts":abstractTexts, |
|
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
403 "authors":authors, |
|
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
404 "title":title, |
|
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
405 "year":issued} |
|
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
406 |
|
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
407 ret.append(item) |
| 9 | 408 |
| 409 logging.debug("getPublicationsFromContext: done in %ss"%(time.time()-startTime)) | |
| 4 | 410 return ret |
| 411 | |
| 412 | |
| 413 | |
| 0 | 414 |
| 415 def manage_addZopePubmanConnectorForm(self): | |
| 416 """Form for external Links""" | |
| 417 pt=zptFile(self, 'zpt/AddZopePubmanConnector.zpt') | |
| 418 return pt() | |
| 419 | |
| 420 | |
| 421 def manage_addZopePubmanConnector(self,id,title,pubmanURL,RESPONSE=None): | |
| 422 """Add an external Link""" | |
| 423 | |
| 424 newObj=ZopePubmanConnector(id,title,pubmanURL) | |
| 425 | |
| 426 self._setObject(id,newObj) | |
| 427 | |
| 428 if RESPONSE is not None: | |
| 429 RESPONSE.redirect('manage_main') | |
| 430 |
