Mercurial > hg > ZopePubmanConnector
annotate zopePubmanConnector.py @ 12:55294833888f
fix error without network connection.
author | casties |
---|---|
date | Thu, 23 May 2013 14:16:47 +0200 |
parents | 69a2f0b8a932 |
children | 43849c9cc08b |
rev | line source |
---|---|
0 | 1 # -*- coding: utf-8 -*- |
2 | |
3 #Verbindet Zope mit pubman. | |
4 | |
5 | |
6 from OFS.SimpleItem import SimpleItem | |
7 from Products.PageTemplates.PageTemplateFile import PageTemplateFile | |
8 import os.path | |
9 | |
10 from Globals import package_home | |
11 import httplib2 | |
12 import xml.etree.ElementTree as ET | |
1 | 13 import logging |
9 | 14 import time |
10 | 15 import unicodedata |
6 | 16 |
17 cacheFolder ="/var/tmp/.cacheWWW" | |
18 | |
0 | 19 def zptFile(self, path, orphaned=False): |
20 """returns a page template file from the product""" | |
21 if orphaned: | |
22 # unusual case | |
23 pt=PageTemplateFile(os.path.join(package_home(globals()), path)) | |
24 else: | |
25 | |
26 pt=PageTemplateFile(os.path.join(package_home(globals()), path)).__of__(self) | |
27 return pt | |
28 | |
29 class ZopePubmanConnector(SimpleItem): | |
30 | |
31 | |
32 connectorString="http://pubman.mpiwg-berlin.mpg.de/search/SearchAndExport?" | |
33 | |
34 | |
35 meta_type="ZopePubmanConnector" | |
36 | |
37 manage_options= ({'label':'Main Config','action': 'changeMain'},) + SimpleItem.manage_options | |
38 | |
39 def __init__(self,id,title,pubmanURL): | |
40 self.id=id | |
41 self.title=title | |
42 self.pubmanURL=pubmanURL #URL einer pubman instance bzw. einer collection, falls nicht die default collection benutzt werden soll | |
43 | |
44 | |
45 | |
46 def changeMain(self,pubmanURL=None,title=None,REQUEST=None,RESPONSE=None): | |
47 """change main settings""" | |
48 if pubmanURL: | |
49 self.pubmanURL=pubmanURL | |
50 self.title=title | |
51 | |
52 if RESPONSE is not None: | |
53 RESPONSE.redirect('manage_main') | |
54 | |
55 | |
56 else: | |
57 pt=zptFile(self, 'zpt/ChangeZopePubmanConnector.zpt') | |
58 return pt() | |
59 | |
60 | |
2 | 61 def getPublications(self,personID,limit=None,publicationType=None): |
0 | 62 """get all publications der personID""" |
6 | 63 h = httplib2.Http(cacheFolder) |
1 | 64 |
65 | |
2 | 66 |
67 if publicationType is None: | |
11 | 68 # cn = self.connectorString+"cqlQuery=escidoc.any-identifier=%22"+personID+"%22&" |
69 cn = self.connectorString+"cqlQuery=escidoc.publication.creator.person.identifier=%22"+personID+"%22&" | |
2 | 70 else: |
11 | 71 #cn = self.connectorString+"cqlQuery=escidoc.any-identifier=%22"+personID+"%22" |
72 cn = self.connectorString+"cqlQuery=escidoc.publication.creator.person.identifier=%22"+personID+"%22&" | |
2 | 73 cn +="%20and%20escidoc.publication.type=%22"+publicationType+"%22&" |
74 | |
75 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending" | |
76 if limit: | |
77 cn+="&maximumRecords=%s"%limit | |
1 | 78 |
2 | 79 logging.debug(cn) |
80 resp, content = h.request(cn) | |
81 | |
3 | 82 |
2 | 83 |
0 | 84 ET.register_namespace("dcterms", "http://purl.org/dc/terms/") |
85 | |
86 root = ET.fromstring(content) | |
87 | |
2 | 88 #<escidocItem:item objid="escidoc:630782" |
89 | |
0 | 90 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation" |
91 | |
2 | 92 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item" |
93 citations=root.findall(objxpath) | |
0 | 94 |
95 ret=[] | |
96 for citation in citations: | |
2 | 97 objId = citation.get('objid') |
98 | |
99 text = citation.find(citationxpath) | |
100 | |
101 ret.append((objId,text.text)) | |
0 | 102 |
103 | |
104 | |
105 return ret | |
106 | |
2 | 107 |
5 | 108 def search(self,values={},exact=False,limit=None,contexts=None): |
2 | 109 """search pubman |
110 @values map mit field->value | |
111 @return map mit escidocId -> XML-formatted snippeds | |
112 """ | |
113 | |
114 fieldToEscidoc={"title":"escidoc.any-title", | |
115 "author":"escidoc.publication.any.publication-creator-names", | |
10 | 116 "any":"escidoc.metadata"} |
2 | 117 |
118 | |
119 cn = self.connectorString+"cqlQuery=%s&" | |
120 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending" | |
121 | |
5 | 122 if limit: |
123 cn+="&maximumRecords=%s"%limit | |
2 | 124 |
125 querys = [] | |
126 for field in values.keys(): | |
127 | |
128 searchField = fieldToEscidoc.get(field,None) | |
129 if searchField is None: | |
130 logging.debug("search, don't know field: %s"%field) | |
131 continue | |
132 | |
133 value = values[field] | |
10 | 134 try: |
135 value=unicodedata.normalize('NFKD', value).encode('ASCII', 'ignore') | |
136 except: | |
137 value=unicodedata.normalize('NFKD', value.decode('utf-8')).encode('ASCII', 'ignore') | |
2 | 138 if value == '': |
139 continue | |
140 logging.debug("%s=%s"%(field,value)) | |
141 if not exact: | |
142 value=value+"*" | |
143 | |
144 querys.append("%s=%%22%s%%22"%(searchField,value)) | |
145 | |
10 | 146 query="%20AND%20".join(querys) |
5 | 147 |
148 if contexts: # einscbraenken auf contexte | |
149 | |
150 if isinstance(contexts, str): | |
151 contexts=[contexts] | |
152 | |
153 ctxquerys=[] | |
154 for context in contexts: | |
155 ctxquerys.append("escidoc.context.objid=%%22%s%%22"%(context)) | |
156 | |
10 | 157 ctxquery="%20OR%20".join(ctxquerys) |
5 | 158 |
159 if query!="": | |
10 | 160 query=query+"AND%%20(%s)"%ctxquery |
5 | 161 else: |
162 query="(%s)"%ctxquery | |
163 | |
12 | 164 try: |
165 h = httplib2.Http(cacheFolder) | |
166 logging.debug("search: "+cn%query) | |
167 resp, content = h.request(cn%query) | |
168 except: | |
169 logging.error("Unable to get data from PubMan!") | |
170 return {} | |
2 | 171 |
172 ET.register_namespace("dcterms", "http://purl.org/dc/terms/") | |
173 | |
3 | 174 try: |
175 root = ET.fromstring(content) | |
176 except: | |
177 logging.error("Couldn't parse content of:%s"%(cn%query)) | |
178 return {} | |
2 | 179 #<escidocItem:item objid="escidoc:630782" |
180 | |
181 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation" | |
182 | |
183 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item" | |
184 citations=root.findall(objxpath) | |
185 | |
186 ret={} | |
187 for citation in citations: | |
188 objId = citation.get('objid') | |
189 text = citation.find(citationxpath) | |
190 ret[objId]=text.text | |
191 | |
192 return ret | |
193 | |
11 | 194 |
195 def getEntriesFromPubman(self,escidocids): | |
2 | 196 |
11 | 197 doctypes={} |
198 for escidocid in escidocids: | |
199 | |
200 txt, type = self.getEntryFromPubman(escidocid.escidocid, True) | |
201 | |
202 if not doctypes.has_key(type): | |
203 doctypes[type]=[] | |
204 | |
205 doctypes[type].append((escidocid.escidocid,txt)) | |
206 | |
207 | |
208 return doctypes | |
209 | |
210 | |
211 def getEntryFromPubman(self,escidocid,extendedData=None): | |
4 | 212 """get one entry""" |
213 | |
3 | 214 escidocid=escidocid.lstrip().strip() |
6 | 215 h = httplib2.Http(cacheFolder) |
2 | 216 cn = self.connectorString+"cqlQuery=escidoc.objid=%s&" |
217 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending" | |
218 | |
219 resp, content = h.request(cn%escidocid) | |
220 ET.register_namespace("dcterms", "http://purl.org/dc/terms/") | |
3 | 221 logging.debug(cn%escidocid) |
4 | 222 |
2 | 223 root = ET.fromstring(content) |
224 | |
225 | |
226 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation" | |
227 | |
228 citation=root.find(citationxpath) | |
229 | |
11 | 230 |
231 if citation is not None and extendedData is not None: | |
232 ns = {'escidocMetadataProfile':"http://escidoc.mpg.de/metadataprofile/schema/0.1/", | |
233 'escidocMetadataRecords':"http://www.escidoc.de/schemas/metadatarecords/0.4" | |
234 } | |
235 | |
236 path = ".//escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/escidocMetadataProfile:publication" | |
237 | |
238 publicationTag= root.find(path,ns); | |
239 return citation.text,publicationTag.get('type') | |
240 | |
2 | 241 if citation is not None: |
242 | |
243 return citation.text | |
11 | 244 |
245 return "",'' | |
2 | 246 |
0 | 247 def pubmanConnectorURL(self): |
248 return self.connectorString | |
4 | 249 |
250 | |
10 | 251 def getPublicationsFromContext(self,context,limit=None,publicationType=None,search=None): |
7 | 252 """gibt alle publicationen des context, jeweils als tupel ("escidoc:id",METADATEN) |
253 | |
254 METADATEN ist hierbei eine Map mit : | |
255 "citation" --> citation in der APA formatierung | |
256 "volume" --> volume | |
257 "link" --> dowloadlink | |
258 "abstracts" --> map mit deu/eng für den abstrakt | |
259 "authors" --> [(NACHNAME,VORNAME]),..] | |
260 "title"--> title | |
261 "year" --> issued | |
262 """ | |
6 | 263 h = httplib2.Http(cacheFolder) |
4 | 264 |
265 if publicationType is None: | |
10 | 266 cn = self.connectorString+"cqlQuery=(escidoc.context.objid=%22"+context+"%22" |
4 | 267 #cn = self.connectorString+"cqlQuery=escidoc.objid=%22"+"escidoc:643455"+"%22&" |
268 else: | |
10 | 269 cn = self.connectorString+"cqlQuery=(escidoc.context.objid=%22"+context+"%22" |
270 cn +="%20and%20escidoc.publication.type=%22"+publicationType+"%22" | |
4 | 271 |
10 | 272 if search is not None and search != "": |
273 try: | |
274 search = unicodedata.normalize('NFKD', search).encode('ASCII', 'ignore') | |
275 except: | |
276 search = unicodedata.normalize('NFKD', search.decode('utf-8')).encode('ASCII', 'ignore') | |
277 cn+="%20and%20escidoc.metadata="+search+"" | |
278 | |
279 | |
280 cn +=")&exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending" | |
4 | 281 if limit: |
282 cn+="&maximumRecords=%s"%limit | |
283 | |
9 | 284 startTime = time.time() |
12 | 285 try: |
286 logging.debug("getPublicationsFromContext: getting %s"%cn) | |
287 resp, content = h.request(cn) | |
288 logging.debug("getPublicationsFromContext: got data in %ss"%(time.time()-startTime)) | |
4 | 289 |
12 | 290 ET.register_namespace("dcterms", "http://purl.org/dc/terms/") |
4 | 291 |
12 | 292 root = ET.fromstring(content) |
293 | |
294 except Exception, e: | |
295 logging.error("Unable to read and parse data! %s"%e) | |
296 return [] | |
297 | |
4 | 298 #<escidocItem:item objid="escidoc:630782" |
299 | |
300 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation" | |
301 abstractpath=".//{http://purl.org/dc/terms/}abstract" | |
6 | 302 issuedpath=".//{http://purl.org/dc/terms/}issued" |
303 | |
304 creatorpath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/publication}creator/{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}person" | |
305 familyNamepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}family-name" | |
306 givenNamepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}given-name" | |
307 | |
308 | |
309 titlepath=".//{http://purl.org/dc/elements/1.1/}title" | |
4 | 310 |
311 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item" | |
312 srcpath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/publication}source" | |
313 volumepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}volume" | |
314 | |
315 #linkspath=""".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content[@storage="internal-managed"]""" | |
316 linkspath=""".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content[@storage="external-url"]""" | |
317 #linkspath=".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content" | |
318 citations=root.findall(objxpath) | |
319 | |
320 ret=[] | |
321 for citation in citations: | |
322 objId = citation.get('objid') | |
323 | |
324 text = citation.find(citationxpath) | |
325 | |
326 #Get volume = preprintID | |
327 # <publication:source type="series"> | |
328 # <dc:title>Max-Planck-Institut für Wissenschaftsgeschichte : Preprint</dc:title> | |
329 # <escidoc:volume>437</escidoc:volume> | |
330 | |
331 src= citation.find(srcpath) | |
332 vol = src.find(volumepath) | |
333 | |
334 #get link to fulltext | |
335 #<escidocComponents:component objid="escidoc:644183"> | |
336 #<escidocComponents:properties> | |
337 # <prop:creation-date>2013-04-29T09:00:01.100Z</prop:creation-date> | |
338 # <prop:valid-status>valid</prop:valid-status> | |
339 # <prop:visibility>public</prop:visibility> | |
340 # <prop:content-category>pre-print</prop:content-category> | |
341 # <prop:file-name>P437.PDF</prop:file-name> | |
342 # <prop:mime-type>application/pdf</prop:mime-type> | |
343 # <prop:checksum>d0ccdc62d6707d934e60e9839ffe30bf</prop:checksum> | |
344 # <prop:checksum-algorithm>MD5</prop:checksum-algorithm> | |
345 #</escidocComponents:properties> | |
346 #<escidocComponents:content xlink:type="simple" xlink:title="P437.PDF" storage="internal-managed" | |
347 # xlink:href="http://pubman.mpiwg-berlin.mpg.de/pubman/item/escidoc:643686:3/component/escidoc:644183/P437.PDF"/> | |
348 # | |
349 | |
350 src= citation.find(linkspath) | |
351 if src is not None: | |
352 | |
353 link=src.get("{http://www.w3.org/1999/xlink}href") | |
354 #logging.debug(src.attrib) | |
355 | |
356 else: | |
357 link ="" | |
358 | |
359 #<dcterms:abstract xml:lang="deu">Dieser Preprint versammelt eine Auswahl von Beiträgen zum Symposium zu Ehren von Hans-Jörg Rheinbergers 65. Geburtstag. Es fand am 24.1.2011 im Max-Planck-Institute für Wissenschaftsgeschichte statt und brachte Freunde, Studenten und Kollegen von Hans-Jörg Rheinberger zusammen.</dcterms:abstract> | |
360 #<dcterms:abstract xml:lang="eng">In this preprint, a selection of contributions to the symposium in honor of Hans-Jörg Rheinberger’s 65th birthday is published. It took place on January 24, 2011 at the Max-Planck-Institute for the History of Science and assembled friends, students and colleagues of Hans-Jörg Rheinberger.</dcterms:abstract> | |
361 | |
362 abstracts = citation.findall(abstractpath) | |
363 | |
364 abstractTexts={} | |
365 for abstract in abstracts: | |
366 | |
367 lang = abstract.get("{http://www.w3.org/XML/1998/namespace}lang") | |
368 abstractTexts[lang]=abstract.text | |
6 | 369 |
370 authorsTags = citation.findall(creatorpath) | |
371 | |
372 authors=[] | |
373 for author in authorsTags: | |
374 | |
375 gn= author.find(givenNamepath).text | |
376 fn= author.find(familyNamepath).text | |
377 | |
378 authors.append((fn,gn)) | |
379 | |
8
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
380 titleTag = citation.find(titlepath) |
6 | 381 |
382 if titleTag is not None: | |
383 title = titleTag.text | |
384 else: | |
385 title="" | |
386 | |
387 issuedTag = citation.find(issuedpath) | |
388 | |
389 if issuedTag is not None: | |
390 issued = issuedTag.text | |
391 else: | |
392 issued="" | |
4 | 393 |
8
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
394 item = {"id":objId, |
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
395 "citation":text.text, |
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
396 "volume":vol.text, |
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
397 "link":link, |
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
398 "abstracts":abstractTexts, |
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
399 "authors":authors, |
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
400 "title":title, |
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
401 "year":issued} |
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
402 |
ddd7e357e518
changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents:
7
diff
changeset
|
403 ret.append(item) |
9 | 404 |
405 logging.debug("getPublicationsFromContext: done in %ss"%(time.time()-startTime)) | |
4 | 406 return ret |
407 | |
408 | |
409 | |
0 | 410 |
411 def manage_addZopePubmanConnectorForm(self): | |
412 """Form for external Links""" | |
413 pt=zptFile(self, 'zpt/AddZopePubmanConnector.zpt') | |
414 return pt() | |
415 | |
416 | |
417 def manage_addZopePubmanConnector(self,id,title,pubmanURL,RESPONSE=None): | |
418 """Add an external Link""" | |
419 | |
420 newObj=ZopePubmanConnector(id,title,pubmanURL) | |
421 | |
422 self._setObject(id,newObj) | |
423 | |
424 if RESPONSE is not None: | |
425 RESPONSE.redirect('manage_main') | |
426 |