comparison zopePubmanConnector.py @ 12:55294833888f

fix error without network connection.
author casties
date Thu, 23 May 2013 14:16:47 +0200
parents 69a2f0b8a932
children 43849c9cc08b
comparison
equal deleted inserted replaced
11:69a2f0b8a932 12:55294833888f
104 104
105 return ret 105 return ret
106 106
107 107
108 def search(self,values={},exact=False,limit=None,contexts=None): 108 def search(self,values={},exact=False,limit=None,contexts=None):
109
110 """search pubman 109 """search pubman
111 @values map mit field->value 110 @values map mit field->value
112 @return map mit escidocId -> XML-formatted snippeds 111 @return map mit escidocId -> XML-formatted snippeds
113 """ 112 """
114 113
120 cn = self.connectorString+"cqlQuery=%s&" 119 cn = self.connectorString+"cqlQuery=%s&"
121 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending" 120 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
122 121
123 if limit: 122 if limit:
124 cn+="&maximumRecords=%s"%limit 123 cn+="&maximumRecords=%s"%limit
125
126
127
128 124
129 querys = [] 125 querys = []
130 for field in values.keys(): 126 for field in values.keys():
131 127
132 searchField = fieldToEscidoc.get(field,None) 128 searchField = fieldToEscidoc.get(field,None)
133 if searchField is None: 129 if searchField is None:
134 logging.debug("search, don't know field: %s"%field) 130 logging.debug("search, don't know field: %s"%field)
135 continue 131 continue
136
137 132
138 value = values[field] 133 value = values[field]
139 try: 134 try:
140 value=unicodedata.normalize('NFKD', value).encode('ASCII', 'ignore') 135 value=unicodedata.normalize('NFKD', value).encode('ASCII', 'ignore')
141 except: 136 except:
143 if value == '': 138 if value == '':
144 continue 139 continue
145 logging.debug("%s=%s"%(field,value)) 140 logging.debug("%s=%s"%(field,value))
146 if not exact: 141 if not exact:
147 value=value+"*" 142 value=value+"*"
148
149 143
150 querys.append("%s=%%22%s%%22"%(searchField,value)) 144 querys.append("%s=%%22%s%%22"%(searchField,value))
151 145
152
153 query="%20AND%20".join(querys) 146 query="%20AND%20".join(querys)
154
155 147
156 if contexts: # einscbraenken auf contexte 148 if contexts: # einscbraenken auf contexte
157 149
158 if isinstance(contexts, str): 150 if isinstance(contexts, str):
159 contexts=[contexts] 151 contexts=[contexts]
167 if query!="": 159 if query!="":
168 query=query+"AND%%20(%s)"%ctxquery 160 query=query+"AND%%20(%s)"%ctxquery
169 else: 161 else:
170 query="(%s)"%ctxquery 162 query="(%s)"%ctxquery
171 163
172 h = httplib2.Http(cacheFolder) 164 try:
173 165 h = httplib2.Http(cacheFolder)
174 logging.debug(cn%query) 166 logging.debug("search: "+cn%query)
175 resp, content = h.request(cn%query) 167 resp, content = h.request(cn%query)
168 except:
169 logging.error("Unable to get data from PubMan!")
170 return {}
176 171
177 ET.register_namespace("dcterms", "http://purl.org/dc/terms/") 172 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
178 173
179 try: 174 try:
180 root = ET.fromstring(content) 175 root = ET.fromstring(content)
189 citations=root.findall(objxpath) 184 citations=root.findall(objxpath)
190 185
191 ret={} 186 ret={}
192 for citation in citations: 187 for citation in citations:
193 objId = citation.get('objid') 188 objId = citation.get('objid')
194
195 text = citation.find(citationxpath) 189 text = citation.find(citationxpath)
196
197 ret[objId]=text.text 190 ret[objId]=text.text
198
199
200 191
201 return ret 192 return ret
202 193
203 194
204 def getEntriesFromPubman(self,escidocids): 195 def getEntriesFromPubman(self,escidocids):
289 cn +=")&exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending" 280 cn +=")&exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
290 if limit: 281 if limit:
291 cn+="&maximumRecords=%s"%limit 282 cn+="&maximumRecords=%s"%limit
292 283
293 startTime = time.time() 284 startTime = time.time()
294 logging.debug("getPublicationsFromContext: getting %s"%cn) 285 try:
295 resp, content = h.request(cn) 286 logging.debug("getPublicationsFromContext: getting %s"%cn)
296 logging.debug("getPublicationsFromContext: got data in %ss"%(time.time()-startTime)) 287 resp, content = h.request(cn)
288 logging.debug("getPublicationsFromContext: got data in %ss"%(time.time()-startTime))
297 289
298 ET.register_namespace("dcterms", "http://purl.org/dc/terms/") 290 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
299 291
300 root = ET.fromstring(content) 292 root = ET.fromstring(content)
301 293
294 except Exception, e:
295 logging.error("Unable to read and parse data! %s"%e)
296 return []
297
302 #<escidocItem:item objid="escidoc:630782" 298 #<escidocItem:item objid="escidoc:630782"
303 299
304 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation" 300 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
305 abstractpath=".//{http://purl.org/dc/terms/}abstract" 301 abstractpath=".//{http://purl.org/dc/terms/}abstract"
306 issuedpath=".//{http://purl.org/dc/terms/}issued" 302 issuedpath=".//{http://purl.org/dc/terms/}issued"