annotate zopePubmanConnector.py @ 3:602b6e46b176

error handling bei falscher suche
author dwinter
date Tue, 30 Apr 2013 18:34:18 +0200
parents 9dbb9354abbe
children f845502cf73a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
373e6610e290 initial
dwinter
parents:
diff changeset
1 # -*- coding: utf-8 -*-
373e6610e290 initial
dwinter
parents:
diff changeset
2
373e6610e290 initial
dwinter
parents:
diff changeset
3 #Verbindet Zope mit pubman.
373e6610e290 initial
dwinter
parents:
diff changeset
4
373e6610e290 initial
dwinter
parents:
diff changeset
5
373e6610e290 initial
dwinter
parents:
diff changeset
6 from OFS.SimpleItem import SimpleItem
373e6610e290 initial
dwinter
parents:
diff changeset
7 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
373e6610e290 initial
dwinter
parents:
diff changeset
8 import os.path
373e6610e290 initial
dwinter
parents:
diff changeset
9
373e6610e290 initial
dwinter
parents:
diff changeset
10 from Globals import package_home
373e6610e290 initial
dwinter
parents:
diff changeset
11 import httplib2
373e6610e290 initial
dwinter
parents:
diff changeset
12 import urlparse
373e6610e290 initial
dwinter
parents:
diff changeset
13 import urllib
373e6610e290 initial
dwinter
parents:
diff changeset
14 import re
373e6610e290 initial
dwinter
parents:
diff changeset
15 import xml.etree.ElementTree as ET
373e6610e290 initial
dwinter
parents:
diff changeset
16 import json
1
c6478f155400 id eingebaut
dwinter
parents: 0
diff changeset
17 import logging
0
373e6610e290 initial
dwinter
parents:
diff changeset
18
373e6610e290 initial
dwinter
parents:
diff changeset
19 def zptFile(self, path, orphaned=False):
373e6610e290 initial
dwinter
parents:
diff changeset
20 """returns a page template file from the product"""
373e6610e290 initial
dwinter
parents:
diff changeset
21 if orphaned:
373e6610e290 initial
dwinter
parents:
diff changeset
22 # unusual case
373e6610e290 initial
dwinter
parents:
diff changeset
23 pt=PageTemplateFile(os.path.join(package_home(globals()), path))
373e6610e290 initial
dwinter
parents:
diff changeset
24 else:
373e6610e290 initial
dwinter
parents:
diff changeset
25
373e6610e290 initial
dwinter
parents:
diff changeset
26 pt=PageTemplateFile(os.path.join(package_home(globals()), path)).__of__(self)
373e6610e290 initial
dwinter
parents:
diff changeset
27 return pt
373e6610e290 initial
dwinter
parents:
diff changeset
28
373e6610e290 initial
dwinter
parents:
diff changeset
29 class ZopePubmanConnector(SimpleItem):
373e6610e290 initial
dwinter
parents:
diff changeset
30
373e6610e290 initial
dwinter
parents:
diff changeset
31
373e6610e290 initial
dwinter
parents:
diff changeset
32 connectorString="http://pubman.mpiwg-berlin.mpg.de/search/SearchAndExport?"
373e6610e290 initial
dwinter
parents:
diff changeset
33
373e6610e290 initial
dwinter
parents:
diff changeset
34
373e6610e290 initial
dwinter
parents:
diff changeset
35 meta_type="ZopePubmanConnector"
373e6610e290 initial
dwinter
parents:
diff changeset
36
373e6610e290 initial
dwinter
parents:
diff changeset
37 manage_options= ({'label':'Main Config','action': 'changeMain'},) + SimpleItem.manage_options
373e6610e290 initial
dwinter
parents:
diff changeset
38
373e6610e290 initial
dwinter
parents:
diff changeset
39 def __init__(self,id,title,pubmanURL):
373e6610e290 initial
dwinter
parents:
diff changeset
40 self.id=id
373e6610e290 initial
dwinter
parents:
diff changeset
41 self.title=title
373e6610e290 initial
dwinter
parents:
diff changeset
42 self.pubmanURL=pubmanURL #URL einer pubman instance bzw. einer collection, falls nicht die default collection benutzt werden soll
373e6610e290 initial
dwinter
parents:
diff changeset
43
373e6610e290 initial
dwinter
parents:
diff changeset
44
373e6610e290 initial
dwinter
parents:
diff changeset
45
373e6610e290 initial
dwinter
parents:
diff changeset
46 def changeMain(self,pubmanURL=None,title=None,REQUEST=None,RESPONSE=None):
373e6610e290 initial
dwinter
parents:
diff changeset
47 """change main settings"""
373e6610e290 initial
dwinter
parents:
diff changeset
48 if pubmanURL:
373e6610e290 initial
dwinter
parents:
diff changeset
49 self.pubmanURL=pubmanURL
373e6610e290 initial
dwinter
parents:
diff changeset
50 self.title=title
373e6610e290 initial
dwinter
parents:
diff changeset
51
373e6610e290 initial
dwinter
parents:
diff changeset
52 if RESPONSE is not None:
373e6610e290 initial
dwinter
parents:
diff changeset
53 RESPONSE.redirect('manage_main')
373e6610e290 initial
dwinter
parents:
diff changeset
54
373e6610e290 initial
dwinter
parents:
diff changeset
55
373e6610e290 initial
dwinter
parents:
diff changeset
56 else:
373e6610e290 initial
dwinter
parents:
diff changeset
57 pt=zptFile(self, 'zpt/ChangeZopePubmanConnector.zpt')
373e6610e290 initial
dwinter
parents:
diff changeset
58 return pt()
373e6610e290 initial
dwinter
parents:
diff changeset
59
373e6610e290 initial
dwinter
parents:
diff changeset
60
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
61 def getPublications(self,personID,limit=None,publicationType=None):
0
373e6610e290 initial
dwinter
parents:
diff changeset
62 """get all publications der personID"""
373e6610e290 initial
dwinter
parents:
diff changeset
63 h = httplib2.Http()
1
c6478f155400 id eingebaut
dwinter
parents: 0
diff changeset
64
c6478f155400 id eingebaut
dwinter
parents: 0
diff changeset
65
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
66
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
67 if publicationType is None:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
68 cn = self.connectorString+"cqlQuery=escidoc.any-identifier=%22"+personID+"%22&"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
69 else:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
70 cn = self.connectorString+"cqlQuery=escidoc.any-identifier=%22"+personID+"%22"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
71 cn +="%20and%20escidoc.publication.type=%22"+publicationType+"%22&"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
72
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
73 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
74 if limit:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
75 cn+="&maximumRecords=%s"%limit
1
c6478f155400 id eingebaut
dwinter
parents: 0
diff changeset
76
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
77 logging.debug(cn)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
78 resp, content = h.request(cn)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
79
3
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
80
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
81
0
373e6610e290 initial
dwinter
parents:
diff changeset
82 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
373e6610e290 initial
dwinter
parents:
diff changeset
83
373e6610e290 initial
dwinter
parents:
diff changeset
84 root = ET.fromstring(content)
373e6610e290 initial
dwinter
parents:
diff changeset
85
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
86 #<escidocItem:item objid="escidoc:630782"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
87
0
373e6610e290 initial
dwinter
parents:
diff changeset
88 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
373e6610e290 initial
dwinter
parents:
diff changeset
89
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
90 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
91 citations=root.findall(objxpath)
0
373e6610e290 initial
dwinter
parents:
diff changeset
92
373e6610e290 initial
dwinter
parents:
diff changeset
93 ret=[]
373e6610e290 initial
dwinter
parents:
diff changeset
94 for citation in citations:
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
95 objId = citation.get('objid')
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
96
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
97 text = citation.find(citationxpath)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
98
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
99 ret.append((objId,text.text))
0
373e6610e290 initial
dwinter
parents:
diff changeset
100
373e6610e290 initial
dwinter
parents:
diff changeset
101
373e6610e290 initial
dwinter
parents:
diff changeset
102
373e6610e290 initial
dwinter
parents:
diff changeset
103 return ret
373e6610e290 initial
dwinter
parents:
diff changeset
104
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
105
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
106 def search(self,values,exact=False):
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
107
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
108 """search pubman
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
109 @values map mit field->value
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
110 @return map mit escidocId -> XML-formatted snippeds
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
111 """
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
112
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
113 fieldToEscidoc={"title":"escidoc.any-title",
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
114 "author":"escidoc.publication.any.publication-creator-names",
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
115 "any":"escidoc.any-metadata"}
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
116
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
117
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
118 cn = self.connectorString+"cqlQuery=%s&"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
119 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
120
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
121
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
122
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
123 querys = []
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
124 for field in values.keys():
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
125
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
126 searchField = fieldToEscidoc.get(field,None)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
127 if searchField is None:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
128 logging.debug("search, don't know field: %s"%field)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
129 continue
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
130
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
131
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
132 value = values[field]
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
133
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
134 if value == '':
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
135 continue
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
136 logging.debug("%s=%s"%(field,value))
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
137 if not exact:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
138 value=value+"*"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
139
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
140
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
141 querys.append("%s=%%22%s%%22"%(searchField,value))
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
142
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
143
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
144 query=" AND ".join(querys)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
145 h = httplib2.Http()
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
146
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
147 logging.debug(cn%query)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
148 resp, content = h.request(cn%query)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
149
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
150 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
151
3
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
152 try:
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
153 root = ET.fromstring(content)
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
154 except:
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
155 logging.error("Couldn't parse content of:%s"%(cn%query))
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
156 return {}
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
157 #<escidocItem:item objid="escidoc:630782"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
158
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
159 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
160
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
161 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
162 citations=root.findall(objxpath)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
163
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
164 ret={}
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
165 for citation in citations:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
166 objId = citation.get('objid')
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
167
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
168 text = citation.find(citationxpath)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
169
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
170 ret[objId]=text.text
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
171
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
172
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
173
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
174 return ret
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
175
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
176
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
177
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
178 def getEntryFromPubman(self,escidocid):
3
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
179 escidocid=escidocid.lstrip().strip()
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
180 h = httplib2.Http()
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
181 cn = self.connectorString+"cqlQuery=escidoc.objid=%s&"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
182 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
183
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
184 resp, content = h.request(cn%escidocid)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
185 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
3
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
186 logging.debug(cn%escidocid)
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
187
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
188 root = ET.fromstring(content)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
189
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
190
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
191 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
192
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
193 citation=root.find(citationxpath)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
194
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
195 if citation is not None:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
196
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
197 return citation.text
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
198
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
199 return ""
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
200
0
373e6610e290 initial
dwinter
parents:
diff changeset
201 def pubmanConnectorURL(self):
373e6610e290 initial
dwinter
parents:
diff changeset
202 return self.connectorString
373e6610e290 initial
dwinter
parents:
diff changeset
203
373e6610e290 initial
dwinter
parents:
diff changeset
204 def manage_addZopePubmanConnectorForm(self):
373e6610e290 initial
dwinter
parents:
diff changeset
205 """Form for external Links"""
373e6610e290 initial
dwinter
parents:
diff changeset
206 pt=zptFile(self, 'zpt/AddZopePubmanConnector.zpt')
373e6610e290 initial
dwinter
parents:
diff changeset
207 return pt()
373e6610e290 initial
dwinter
parents:
diff changeset
208
373e6610e290 initial
dwinter
parents:
diff changeset
209
373e6610e290 initial
dwinter
parents:
diff changeset
210 def manage_addZopePubmanConnector(self,id,title,pubmanURL,RESPONSE=None):
373e6610e290 initial
dwinter
parents:
diff changeset
211 """Add an external Link"""
373e6610e290 initial
dwinter
parents:
diff changeset
212
373e6610e290 initial
dwinter
parents:
diff changeset
213 newObj=ZopePubmanConnector(id,title,pubmanURL)
373e6610e290 initial
dwinter
parents:
diff changeset
214
373e6610e290 initial
dwinter
parents:
diff changeset
215 self._setObject(id,newObj)
373e6610e290 initial
dwinter
parents:
diff changeset
216
373e6610e290 initial
dwinter
parents:
diff changeset
217
373e6610e290 initial
dwinter
parents:
diff changeset
218 if RESPONSE is not None:
373e6610e290 initial
dwinter
parents:
diff changeset
219 RESPONSE.redirect('manage_main')
373e6610e290 initial
dwinter
parents:
diff changeset
220