0
|
1 # -*- coding: utf-8 -*-
|
|
2
|
|
3 #Verbindet Zope mit pubman.
|
|
4
|
|
5
|
|
6 from OFS.SimpleItem import SimpleItem
|
|
7 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
|
|
8 import os.path
|
|
9
|
|
10 from Globals import package_home
|
|
11 import httplib2
|
|
12 import urlparse
|
|
13 import urllib
|
|
14 import re
|
|
15 import xml.etree.ElementTree as ET
|
|
16 import json
|
1
|
17 import logging
|
0
|
18
|
6
|
19
|
|
20 cacheFolder ="/var/tmp/.cacheWWW"
|
|
21
|
0
|
22 def zptFile(self, path, orphaned=False):
|
|
23 """returns a page template file from the product"""
|
|
24 if orphaned:
|
|
25 # unusual case
|
|
26 pt=PageTemplateFile(os.path.join(package_home(globals()), path))
|
|
27 else:
|
|
28
|
|
29 pt=PageTemplateFile(os.path.join(package_home(globals()), path)).__of__(self)
|
|
30 return pt
|
|
31
|
|
32 class ZopePubmanConnector(SimpleItem):
|
|
33
|
|
34
|
|
35 connectorString="http://pubman.mpiwg-berlin.mpg.de/search/SearchAndExport?"
|
|
36
|
|
37
|
|
38 meta_type="ZopePubmanConnector"
|
|
39
|
|
40 manage_options= ({'label':'Main Config','action': 'changeMain'},) + SimpleItem.manage_options
|
|
41
|
|
42 def __init__(self,id,title,pubmanURL):
|
|
43 self.id=id
|
|
44 self.title=title
|
|
45 self.pubmanURL=pubmanURL #URL einer pubman instance bzw. einer collection, falls nicht die default collection benutzt werden soll
|
|
46
|
|
47
|
|
48
|
|
49 def changeMain(self,pubmanURL=None,title=None,REQUEST=None,RESPONSE=None):
|
|
50 """change main settings"""
|
|
51 if pubmanURL:
|
|
52 self.pubmanURL=pubmanURL
|
|
53 self.title=title
|
|
54
|
|
55 if RESPONSE is not None:
|
|
56 RESPONSE.redirect('manage_main')
|
|
57
|
|
58
|
|
59 else:
|
|
60 pt=zptFile(self, 'zpt/ChangeZopePubmanConnector.zpt')
|
|
61 return pt()
|
|
62
|
|
63
|
2
|
64 def getPublications(self,personID,limit=None,publicationType=None):
|
0
|
65 """get all publications der personID"""
|
6
|
66 h = httplib2.Http(cacheFolder)
|
1
|
67
|
|
68
|
2
|
69
|
|
70 if publicationType is None:
|
|
71 cn = self.connectorString+"cqlQuery=escidoc.any-identifier=%22"+personID+"%22&"
|
|
72 else:
|
|
73 cn = self.connectorString+"cqlQuery=escidoc.any-identifier=%22"+personID+"%22"
|
|
74 cn +="%20and%20escidoc.publication.type=%22"+publicationType+"%22&"
|
|
75
|
|
76 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
|
|
77 if limit:
|
|
78 cn+="&maximumRecords=%s"%limit
|
1
|
79
|
2
|
80 logging.debug(cn)
|
|
81 resp, content = h.request(cn)
|
|
82
|
3
|
83
|
2
|
84
|
0
|
85 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
|
|
86
|
|
87 root = ET.fromstring(content)
|
|
88
|
2
|
89 #<escidocItem:item objid="escidoc:630782"
|
|
90
|
0
|
91 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
|
|
92
|
2
|
93 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item"
|
|
94 citations=root.findall(objxpath)
|
0
|
95
|
|
96 ret=[]
|
|
97 for citation in citations:
|
2
|
98 objId = citation.get('objid')
|
|
99
|
|
100 text = citation.find(citationxpath)
|
|
101
|
|
102 ret.append((objId,text.text))
|
0
|
103
|
|
104
|
|
105
|
|
106 return ret
|
|
107
|
2
|
108
|
5
|
109 def search(self,values={},exact=False,limit=None,contexts=None):
|
2
|
110
|
|
111 """search pubman
|
|
112 @values map mit field->value
|
|
113 @return map mit escidocId -> XML-formatted snippeds
|
|
114 """
|
|
115
|
|
116 fieldToEscidoc={"title":"escidoc.any-title",
|
|
117 "author":"escidoc.publication.any.publication-creator-names",
|
|
118 "any":"escidoc.any-metadata"}
|
|
119
|
|
120
|
|
121 cn = self.connectorString+"cqlQuery=%s&"
|
|
122 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
|
|
123
|
5
|
124 if limit:
|
|
125 cn+="&maximumRecords=%s"%limit
|
|
126
|
|
127
|
2
|
128
|
|
129
|
|
130 querys = []
|
|
131 for field in values.keys():
|
|
132
|
|
133 searchField = fieldToEscidoc.get(field,None)
|
|
134 if searchField is None:
|
|
135 logging.debug("search, don't know field: %s"%field)
|
|
136 continue
|
|
137
|
|
138
|
|
139 value = values[field]
|
|
140
|
|
141 if value == '':
|
|
142 continue
|
|
143 logging.debug("%s=%s"%(field,value))
|
|
144 if not exact:
|
|
145 value=value+"*"
|
|
146
|
|
147
|
|
148 querys.append("%s=%%22%s%%22"%(searchField,value))
|
|
149
|
|
150
|
5
|
151 query="+AND+".join(querys)
|
|
152
|
|
153
|
|
154 if contexts: # einscbraenken auf contexte
|
|
155
|
|
156 if isinstance(contexts, str):
|
|
157 contexts=[contexts]
|
|
158
|
|
159 ctxquerys=[]
|
|
160 for context in contexts:
|
|
161 ctxquerys.append("escidoc.context.objid=%%22%s%%22"%(context))
|
|
162
|
|
163 ctxquery="+OR+".join(ctxquerys)
|
|
164
|
|
165 if query!="":
|
|
166 query=query+"AND (%s)"%ctxquery
|
|
167 else:
|
|
168 query="(%s)"%ctxquery
|
|
169
|
6
|
170 h = httplib2.Http(cacheFolder)
|
2
|
171
|
|
172 logging.debug(cn%query)
|
|
173 resp, content = h.request(cn%query)
|
|
174
|
|
175 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
|
|
176
|
3
|
177 try:
|
|
178 root = ET.fromstring(content)
|
|
179 except:
|
|
180 logging.error("Couldn't parse content of:%s"%(cn%query))
|
|
181 return {}
|
2
|
182 #<escidocItem:item objid="escidoc:630782"
|
|
183
|
|
184 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
|
|
185
|
|
186 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item"
|
|
187 citations=root.findall(objxpath)
|
|
188
|
|
189 ret={}
|
|
190 for citation in citations:
|
|
191 objId = citation.get('objid')
|
|
192
|
|
193 text = citation.find(citationxpath)
|
|
194
|
|
195 ret[objId]=text.text
|
|
196
|
|
197
|
|
198
|
|
199 return ret
|
|
200
|
|
201
|
|
202
|
|
203 def getEntryFromPubman(self,escidocid):
|
4
|
204 """get one entry"""
|
|
205
|
3
|
206 escidocid=escidocid.lstrip().strip()
|
6
|
207 h = httplib2.Http(cacheFolder)
|
2
|
208 cn = self.connectorString+"cqlQuery=escidoc.objid=%s&"
|
|
209 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
|
|
210
|
|
211 resp, content = h.request(cn%escidocid)
|
|
212 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
|
3
|
213 logging.debug(cn%escidocid)
|
4
|
214
|
2
|
215 root = ET.fromstring(content)
|
|
216
|
|
217
|
|
218 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
|
|
219
|
|
220 citation=root.find(citationxpath)
|
|
221
|
|
222 if citation is not None:
|
|
223
|
|
224 return citation.text
|
|
225
|
|
226 return ""
|
|
227
|
0
|
228 def pubmanConnectorURL(self):
|
|
229 return self.connectorString
|
4
|
230
|
|
231
|
|
232
|
|
233 def getPreprintsFromContext(self,context,limit=None,publicationType=None):
|
|
234 """get all publications des context"""
|
6
|
235 h = httplib2.Http(cacheFolder)
|
4
|
236
|
|
237
|
|
238
|
|
239 if publicationType is None:
|
|
240 cn = self.connectorString+"cqlQuery=escidoc.context.objid=%22"+context+"%22&"
|
|
241 #cn = self.connectorString+"cqlQuery=escidoc.objid=%22"+"escidoc:643455"+"%22&"
|
|
242 else:
|
|
243 cn = self.connectorString+"cqlQuery=escidoc.context.objid=%22"+context+"%22"
|
|
244 cn +="%20and%20escidoc.publication.type=%22"+publicationType+"%22&"
|
|
245
|
|
246 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
|
|
247 if limit:
|
|
248 cn+="&maximumRecords=%s"%limit
|
|
249
|
|
250 logging.debug(cn)
|
|
251 resp, content = h.request(cn)
|
|
252
|
|
253
|
|
254 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
|
|
255
|
|
256 root = ET.fromstring(content)
|
|
257
|
|
258 #<escidocItem:item objid="escidoc:630782"
|
|
259
|
|
260 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
|
|
261 abstractpath=".//{http://purl.org/dc/terms/}abstract"
|
6
|
262 issuedpath=".//{http://purl.org/dc/terms/}issued"
|
|
263
|
|
264 creatorpath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/publication}creator/{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}person"
|
|
265 familyNamepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}family-name"
|
|
266 givenNamepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}given-name"
|
|
267
|
|
268
|
|
269 titlepath=".//{http://purl.org/dc/elements/1.1/}title"
|
4
|
270
|
|
271 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item"
|
|
272 srcpath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/publication}source"
|
|
273 volumepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}volume"
|
|
274
|
|
275 #linkspath=""".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content[@storage="internal-managed"]"""
|
|
276 linkspath=""".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content[@storage="external-url"]"""
|
|
277 #linkspath=".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content"
|
|
278 citations=root.findall(objxpath)
|
|
279
|
|
280 ret=[]
|
|
281 for citation in citations:
|
|
282 objId = citation.get('objid')
|
|
283
|
|
284 text = citation.find(citationxpath)
|
|
285
|
|
286 #Get volume = preprintID
|
|
287 # <publication:source type="series">
|
|
288 # <dc:title>Max-Planck-Institut für Wissenschaftsgeschichte : Preprint</dc:title>
|
|
289 # <escidoc:volume>437</escidoc:volume>
|
|
290
|
|
291
|
|
292
|
|
293 src= citation.find(srcpath)
|
|
294 vol = src.find(volumepath)
|
|
295
|
|
296
|
|
297 #get link to fulltext
|
|
298 #<escidocComponents:component objid="escidoc:644183">
|
|
299 #<escidocComponents:properties>
|
|
300 # <prop:creation-date>2013-04-29T09:00:01.100Z</prop:creation-date>
|
|
301 # <prop:valid-status>valid</prop:valid-status>
|
|
302 # <prop:visibility>public</prop:visibility>
|
|
303 # <prop:content-category>pre-print</prop:content-category>
|
|
304 # <prop:file-name>P437.PDF</prop:file-name>
|
|
305 # <prop:mime-type>application/pdf</prop:mime-type>
|
|
306 # <prop:checksum>d0ccdc62d6707d934e60e9839ffe30bf</prop:checksum>
|
|
307 # <prop:checksum-algorithm>MD5</prop:checksum-algorithm>
|
|
308 #</escidocComponents:properties>
|
|
309 #<escidocComponents:content xlink:type="simple" xlink:title="P437.PDF" storage="internal-managed"
|
|
310 # xlink:href="http://pubman.mpiwg-berlin.mpg.de/pubman/item/escidoc:643686:3/component/escidoc:644183/P437.PDF"/>
|
|
311 #
|
|
312
|
|
313 src= citation.find(linkspath)
|
|
314 if src is not None:
|
|
315
|
|
316 link=src.get("{http://www.w3.org/1999/xlink}href")
|
|
317 #logging.debug(src.attrib)
|
|
318
|
|
319 else:
|
|
320 link =""
|
|
321
|
|
322
|
|
323 #<dcterms:abstract xml:lang="deu">Dieser Preprint versammelt eine Auswahl von Beiträgen zum Symposium zu Ehren von Hans-Jörg Rheinbergers 65. Geburtstag. Es fand am 24.1.2011 im Max-Planck-Institute für Wissenschaftsgeschichte statt und brachte Freunde, Studenten und Kollegen von Hans-Jörg Rheinberger zusammen.</dcterms:abstract>
|
|
324 #<dcterms:abstract xml:lang="eng">In this preprint, a selection of contributions to the symposium in honor of Hans-Jörg Rheinberger’s 65th birthday is published. It took place on January 24, 2011 at the Max-Planck-Institute for the History of Science and assembled friends, students and colleagues of Hans-Jörg Rheinberger.</dcterms:abstract>
|
|
325
|
|
326
|
|
327
|
|
328 abstracts = citation.findall(abstractpath)
|
|
329
|
|
330 abstractTexts={}
|
|
331 for abstract in abstracts:
|
|
332
|
|
333 lang = abstract.get("{http://www.w3.org/XML/1998/namespace}lang")
|
|
334 abstractTexts[lang]=abstract.text
|
6
|
335
|
4
|
336
|
6
|
337
|
|
338 authorsTags = citation.findall(creatorpath)
|
|
339
|
|
340 authors=[]
|
|
341 for author in authorsTags:
|
|
342
|
|
343 gn= author.find(givenNamepath).text
|
|
344 fn= author.find(familyNamepath).text
|
|
345
|
|
346 authors.append((fn,gn))
|
|
347
|
|
348
|
|
349 titleTag = citation.find(titlepath)
|
4
|
350
|
6
|
351
|
|
352
|
|
353 if titleTag is not None:
|
|
354 title = titleTag.text
|
|
355 else:
|
|
356 title=""
|
|
357
|
4
|
358
|
6
|
359 issuedTag = citation.find(issuedpath)
|
|
360
|
|
361
|
|
362
|
|
363 if issuedTag is not None:
|
|
364 issued = issuedTag.text
|
|
365 else:
|
|
366 issued=""
|
|
367
|
|
368
|
|
369
|
|
370 ret.append((objId,{"citation":text.text,"volume":vol.text,
|
|
371 "link":link,
|
|
372 "abstracts":abstractTexts,
|
|
373 "authors":authors,
|
|
374 "title":title,
|
|
375 "year":issued}))
|
4
|
376
|
|
377
|
|
378 def cmpret(x,y): #sort by preprint number
|
|
379 try:
|
|
380 return -cmp(int(x[2]),int(y[2]))
|
|
381 except:
|
|
382 return 0
|
|
383 #return cmp(x[2],y[2])
|
|
384
|
|
385 ret.sort(cmpret)
|
|
386 return ret
|
|
387
|
|
388
|
|
389
|
0
|
390
|
|
391 def manage_addZopePubmanConnectorForm(self):
|
|
392 """Form for external Links"""
|
|
393 pt=zptFile(self, 'zpt/AddZopePubmanConnector.zpt')
|
|
394 return pt()
|
|
395
|
|
396
|
|
397 def manage_addZopePubmanConnector(self,id,title,pubmanURL,RESPONSE=None):
|
|
398 """Add an external Link"""
|
|
399
|
|
400 newObj=ZopePubmanConnector(id,title,pubmanURL)
|
|
401
|
|
402 self._setObject(id,newObj)
|
|
403
|
|
404
|
|
405 if RESPONSE is not None:
|
|
406 RESPONSE.redirect('manage_main')
|
|
407 |