annotate zopePubmanConnector.py @ 24:345dd913f520 default tip

new pubman
author Dirk Wintergr?n <dwinter@mpiwg-berlin.mpg.de>
date Fri, 10 Jan 2014 12:43:43 +0100
parents d24a8673d68e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
373e6610e290 initial
dwinter
parents:
diff changeset
1 # -*- coding: utf-8 -*-
373e6610e290 initial
dwinter
parents:
diff changeset
2
373e6610e290 initial
dwinter
parents:
diff changeset
3 #Verbindet Zope mit pubman.
373e6610e290 initial
dwinter
parents:
diff changeset
4
373e6610e290 initial
dwinter
parents:
diff changeset
5
373e6610e290 initial
dwinter
parents:
diff changeset
6 from OFS.SimpleItem import SimpleItem
373e6610e290 initial
dwinter
parents:
diff changeset
7 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
373e6610e290 initial
dwinter
parents:
diff changeset
8 import os.path
373e6610e290 initial
dwinter
parents:
diff changeset
9
373e6610e290 initial
dwinter
parents:
diff changeset
10 from Globals import package_home
373e6610e290 initial
dwinter
parents:
diff changeset
11 import httplib2
373e6610e290 initial
dwinter
parents:
diff changeset
12 import xml.etree.ElementTree as ET
1
c6478f155400 id eingebaut
dwinter
parents: 0
diff changeset
13 import logging
9
166482535b54 timing output for debugging.
casties
parents: 8
diff changeset
14 import time
10
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
15 import unicodedata
6
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
16
16
3e154b154b6f timeouts
dwinter
parents: 15
diff changeset
17 TIMEOUT=10
3e154b154b6f timeouts
dwinter
parents: 15
diff changeset
18
6
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
19 cacheFolder ="/var/tmp/.cacheWWW"
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
20
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
21 ns = {'escidocMetadataProfile':"http://escidoc.mpg.de/metadataprofile/schema/0.1/",
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
22 'escidocMetadataRecords':"http://www.escidoc.de/schemas/metadatarecords/0.4",
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
23 'dc':'http://purl.org/dc/elements/1.1/',
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
24 'escidocComponents':'http://www.escidoc.de/schemas/components/0.8',
18
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
25 'escidocItem':'http://www.escidoc.de/schemas/item/0.8',
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
26 'srel':'http://escidoc.de/core/01/structural-relations/',
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
27 }
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
28
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
29
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
30
0
373e6610e290 initial
dwinter
parents:
diff changeset
31 def zptFile(self, path, orphaned=False):
373e6610e290 initial
dwinter
parents:
diff changeset
32 """returns a page template file from the product"""
373e6610e290 initial
dwinter
parents:
diff changeset
33 if orphaned:
373e6610e290 initial
dwinter
parents:
diff changeset
34 # unusual case
373e6610e290 initial
dwinter
parents:
diff changeset
35 pt=PageTemplateFile(os.path.join(package_home(globals()), path))
373e6610e290 initial
dwinter
parents:
diff changeset
36 else:
373e6610e290 initial
dwinter
parents:
diff changeset
37
373e6610e290 initial
dwinter
parents:
diff changeset
38 pt=PageTemplateFile(os.path.join(package_home(globals()), path)).__of__(self)
373e6610e290 initial
dwinter
parents:
diff changeset
39 return pt
373e6610e290 initial
dwinter
parents:
diff changeset
40
373e6610e290 initial
dwinter
parents:
diff changeset
41 class ZopePubmanConnector(SimpleItem):
373e6610e290 initial
dwinter
parents:
diff changeset
42
373e6610e290 initial
dwinter
parents:
diff changeset
43
24
345dd913f520 new pubman
Dirk Wintergr?n <dwinter@mpiwg-berlin.mpg.de>
parents: 23
diff changeset
44 #connectorString="http://pubman.mpiwg-berlin.mpg.de/search/SearchAndExport?"
0
373e6610e290 initial
dwinter
parents:
diff changeset
45
373e6610e290 initial
dwinter
parents:
diff changeset
46
373e6610e290 initial
dwinter
parents:
diff changeset
47 meta_type="ZopePubmanConnector"
373e6610e290 initial
dwinter
parents:
diff changeset
48
373e6610e290 initial
dwinter
parents:
diff changeset
49 manage_options= ({'label':'Main Config','action': 'changeMain'},) + SimpleItem.manage_options
373e6610e290 initial
dwinter
parents:
diff changeset
50
373e6610e290 initial
dwinter
parents:
diff changeset
51 def __init__(self,id,title,pubmanURL):
373e6610e290 initial
dwinter
parents:
diff changeset
52 self.id=id
373e6610e290 initial
dwinter
parents:
diff changeset
53 self.title=title
373e6610e290 initial
dwinter
parents:
diff changeset
54 self.pubmanURL=pubmanURL #URL einer pubman instance bzw. einer collection, falls nicht die default collection benutzt werden soll
373e6610e290 initial
dwinter
parents:
diff changeset
55
373e6610e290 initial
dwinter
parents:
diff changeset
56
373e6610e290 initial
dwinter
parents:
diff changeset
57
373e6610e290 initial
dwinter
parents:
diff changeset
58 def changeMain(self,pubmanURL=None,title=None,REQUEST=None,RESPONSE=None):
373e6610e290 initial
dwinter
parents:
diff changeset
59 """change main settings"""
373e6610e290 initial
dwinter
parents:
diff changeset
60 if pubmanURL:
373e6610e290 initial
dwinter
parents:
diff changeset
61 self.pubmanURL=pubmanURL
373e6610e290 initial
dwinter
parents:
diff changeset
62 self.title=title
373e6610e290 initial
dwinter
parents:
diff changeset
63
373e6610e290 initial
dwinter
parents:
diff changeset
64 if RESPONSE is not None:
373e6610e290 initial
dwinter
parents:
diff changeset
65 RESPONSE.redirect('manage_main')
373e6610e290 initial
dwinter
parents:
diff changeset
66
373e6610e290 initial
dwinter
parents:
diff changeset
67
373e6610e290 initial
dwinter
parents:
diff changeset
68 else:
373e6610e290 initial
dwinter
parents:
diff changeset
69 pt=zptFile(self, 'zpt/ChangeZopePubmanConnector.zpt')
373e6610e290 initial
dwinter
parents:
diff changeset
70 return pt()
373e6610e290 initial
dwinter
parents:
diff changeset
71
373e6610e290 initial
dwinter
parents:
diff changeset
72
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
73 def getPublications(self,personID,limit=None,publicationType=None):
0
373e6610e290 initial
dwinter
parents:
diff changeset
74 """get all publications der personID"""
16
3e154b154b6f timeouts
dwinter
parents: 15
diff changeset
75 h = httplib2.Http(cacheFolder,timeout=TIMEOUT)
1
c6478f155400 id eingebaut
dwinter
parents: 0
diff changeset
76
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
77 if publicationType is None:
11
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
78 # cn = self.connectorString+"cqlQuery=escidoc.any-identifier=%22"+personID+"%22&"
24
345dd913f520 new pubman
Dirk Wintergr?n <dwinter@mpiwg-berlin.mpg.de>
parents: 23
diff changeset
79 cn = self.pubmanURL+"cqlQuery=escidoc.publication.creator.person.identifier=%22"+personID+"%22&"
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
80 else:
11
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
81 #cn = self.connectorString+"cqlQuery=escidoc.any-identifier=%22"+personID+"%22"
24
345dd913f520 new pubman
Dirk Wintergr?n <dwinter@mpiwg-berlin.mpg.de>
parents: 23
diff changeset
82 cn = self.pubmanURL+"cqlQuery=%28escidoc.publication.creator.person.identifier=%22"+personID+"%22%29"
13
43849c9cc08b Incomplete - # 74: More Link auf den pers?nlichne Homepages
dwinter
parents: 12
diff changeset
83 cn +="%20and%28%20escidoc.publication.type=%22"+publicationType+"%22%29&"
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
84
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
85 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
86 if limit:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
87 cn+="&maximumRecords=%s"%limit
1
c6478f155400 id eingebaut
dwinter
parents: 0
diff changeset
88
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
89 logging.debug(cn)
22
2abc89d58140 more error handling
casties
parents: 21
diff changeset
90 try:
2abc89d58140 more error handling
casties
parents: 21
diff changeset
91 resp, content = h.request(cn)
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
92
22
2abc89d58140 more error handling
casties
parents: 21
diff changeset
93 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
2abc89d58140 more error handling
casties
parents: 21
diff changeset
94
2abc89d58140 more error handling
casties
parents: 21
diff changeset
95 root = ET.fromstring(content)
0
373e6610e290 initial
dwinter
parents:
diff changeset
96
22
2abc89d58140 more error handling
casties
parents: 21
diff changeset
97 except Exception, e:
2abc89d58140 more error handling
casties
parents: 21
diff changeset
98 logging.error("Error getting and parsing data from PubMan: %s"%e)
2abc89d58140 more error handling
casties
parents: 21
diff changeset
99 return []
0
373e6610e290 initial
dwinter
parents:
diff changeset
100
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
101 #<escidocItem:item objid="escidoc:630782"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
102
0
373e6610e290 initial
dwinter
parents:
diff changeset
103 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
373e6610e290 initial
dwinter
parents:
diff changeset
104
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
105 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item"
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
106
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
107
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
108
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
109 citations=root.findall(objxpath)
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
110 logging.debug(len(citations))
0
373e6610e290 initial
dwinter
parents:
diff changeset
111 ret=[]
373e6610e290 initial
dwinter
parents:
diff changeset
112 for citation in citations:
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
113 objId = citation.get('objid')
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
114
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
115 text = citation.find(citationxpath)
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
116
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
117 idTermPath =""".//escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/escidocMetadataProfile:publication/dc:identifier"""
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
118 #idTermPath =".//{http://purl.org/dc/elements/1.1/}identifier"
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
119
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
120 idterms = citation.findall(idTermPath,ns)
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
121
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
122 linksIdentifier=[]
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
123 linksLocator=[]
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
124
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
125
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
126 bookID = None
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
127
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
128
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
129 for idterm in idterms:
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
130 if idterm.get("{http://www.w3.org/2001/XMLSchema-instance}type",'') in ['eterms:OTHER','eidt:OTHER']: ##suche nach bookID
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
131 logging.debug("zopePubmanConnector: %s"%idterm.text)
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
132 checkID =idterm.text.lstrip().rstrip()
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
133 if checkID.startswith("MPIWG-Book:"):
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
134 bookID = checkID
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
135 break
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
136 elif idterm.get("{http://www.w3.org/2001/XMLSchema-instance}type",'') in ['eterms:URI','eidt:URI']:
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
137 linksIdentifier.append(idterm.text.lstrip().rstrip())
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
138
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
139
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
140
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
141
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
142 componentsPath =""".//escidocComponents:components[1]"""
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
143
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
144 components=citation.findall(componentsPath,ns);
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
145
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
146 for component in components:
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
147 cnt = component.find(".//escidocComponents:content",ns)
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
148 if cnt is not None:
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
149 link=""
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
150 title=""
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
151 type=""
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
152 for name,value in cnt.items():
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
153 if name.endswith("href"):
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
154 link=value
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
155 elif name.endswith("title"):
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
156 title=value
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
157 elif name.endswith("storage"):
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
158 type=value
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
159
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
160 linksLocator.append((title,link,type))
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
161
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
162
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
163
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
164
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
165 ret.append((objId,text.text,bookID,linksIdentifier,linksLocator))
0
373e6610e290 initial
dwinter
parents:
diff changeset
166
373e6610e290 initial
dwinter
parents:
diff changeset
167
373e6610e290 initial
dwinter
parents:
diff changeset
168
373e6610e290 initial
dwinter
parents:
diff changeset
169 return ret
373e6610e290 initial
dwinter
parents:
diff changeset
170
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
171
19
38ff05179d71 search got extra perameter searchkeys
dwinter
parents: 18
diff changeset
172 def search(self,values={},exact=False,limit=None,contexts=None,resultWithContext=False,sortKeys="escidoc.any-dates"):
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
173 """search pubman
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
174 @values map mit field->value
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
175 @return map mit escidocId -> XML-formatted snippeds
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
176 """
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
177
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
178 fieldToEscidoc={"title":"escidoc.any-title",
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
179 "author":"escidoc.publication.any.publication-creator-names",
10
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
180 "any":"escidoc.metadata"}
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
181
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
182
24
345dd913f520 new pubman
Dirk Wintergr?n <dwinter@mpiwg-berlin.mpg.de>
parents: 23
diff changeset
183 cn = self.pubmanURL+"cqlQuery=%s&"
19
38ff05179d71 search got extra perameter searchkeys
dwinter
parents: 18
diff changeset
184 #cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
38ff05179d71 search got extra perameter searchkeys
dwinter
parents: 18
diff changeset
185 #cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.property.creation-date&sortOrder=descending"
38ff05179d71 search got extra perameter searchkeys
dwinter
parents: 18
diff changeset
186 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys="+sortKeys+"&sortOrder=descending"
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
187
5
245294b18a1d search modified
dwinter
parents: 4
diff changeset
188 if limit:
245294b18a1d search modified
dwinter
parents: 4
diff changeset
189 cn+="&maximumRecords=%s"%limit
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
190
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
191 querys = []
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
192 for field in values.keys():
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
193
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
194 searchField = fieldToEscidoc.get(field,None)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
195 if searchField is None:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
196 logging.debug("search, don't know field: %s"%field)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
197 continue
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
198
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
199 value = values[field]
10
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
200 try:
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
201 value=unicodedata.normalize('NFKD', value).encode('ASCII', 'ignore')
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
202 except:
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
203 value=unicodedata.normalize('NFKD', value.decode('utf-8')).encode('ASCII', 'ignore')
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
204 if value == '':
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
205 continue
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
206 logging.debug("%s=%s"%(field,value))
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
207 if not exact:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
208 value=value+"*"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
209
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
210 querys.append("%s=%%22%s%%22"%(searchField,value))
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
211
10
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
212 query="%20AND%20".join(querys)
5
245294b18a1d search modified
dwinter
parents: 4
diff changeset
213
245294b18a1d search modified
dwinter
parents: 4
diff changeset
214 if contexts: # einscbraenken auf contexte
245294b18a1d search modified
dwinter
parents: 4
diff changeset
215
245294b18a1d search modified
dwinter
parents: 4
diff changeset
216 if isinstance(contexts, str):
245294b18a1d search modified
dwinter
parents: 4
diff changeset
217 contexts=[contexts]
245294b18a1d search modified
dwinter
parents: 4
diff changeset
218
245294b18a1d search modified
dwinter
parents: 4
diff changeset
219 ctxquerys=[]
245294b18a1d search modified
dwinter
parents: 4
diff changeset
220 for context in contexts:
245294b18a1d search modified
dwinter
parents: 4
diff changeset
221 ctxquerys.append("escidoc.context.objid=%%22%s%%22"%(context))
245294b18a1d search modified
dwinter
parents: 4
diff changeset
222
10
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
223 ctxquery="%20OR%20".join(ctxquerys)
5
245294b18a1d search modified
dwinter
parents: 4
diff changeset
224
245294b18a1d search modified
dwinter
parents: 4
diff changeset
225 if query!="":
10
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
226 query=query+"AND%%20(%s)"%ctxquery
5
245294b18a1d search modified
dwinter
parents: 4
diff changeset
227 else:
245294b18a1d search modified
dwinter
parents: 4
diff changeset
228 query="(%s)"%ctxquery
245294b18a1d search modified
dwinter
parents: 4
diff changeset
229
12
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
230 try:
16
3e154b154b6f timeouts
dwinter
parents: 15
diff changeset
231 h = httplib2.Http(cacheFolder,timeout=TIMEOUT)
12
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
232 logging.debug("search: "+cn%query)
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
233 resp, content = h.request(cn%query)
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
234 except:
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
235 logging.error("Unable to get data from PubMan!")
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
236 return {}
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
237
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
238 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
239
3
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
240 try:
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
241 root = ET.fromstring(content)
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
242 except:
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
243 logging.error("Couldn't parse content of:%s"%(cn%query))
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
244 return {}
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
245 #<escidocItem:item objid="escidoc:630782"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
246
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
247 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
248
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
249 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
250 citations=root.findall(objxpath)
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
251
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
252 ret={}
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
253 for citation in citations:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
254 objId = citation.get('objid')
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
255 text = citation.find(citationxpath)
19
38ff05179d71 search got extra perameter searchkeys
dwinter
parents: 18
diff changeset
256
38ff05179d71 search got extra perameter searchkeys
dwinter
parents: 18
diff changeset
257 if resultWithContext:
38ff05179d71 search got extra perameter searchkeys
dwinter
parents: 18
diff changeset
258 ctxPath=".//escidocItem:properties/srel:context"
38ff05179d71 search got extra perameter searchkeys
dwinter
parents: 18
diff changeset
259 ctx = citation.find(ctxPath,ns)
38ff05179d71 search got extra perameter searchkeys
dwinter
parents: 18
diff changeset
260 ret[objId]=(text.text,ctx.get('objid'))
38ff05179d71 search got extra perameter searchkeys
dwinter
parents: 18
diff changeset
261 else:
38ff05179d71 search got extra perameter searchkeys
dwinter
parents: 18
diff changeset
262 ret[objId]=text.text
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
263
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
264 return ret
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
265
11
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
266
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
267 def getEntriesFromPubman(self,escidocids):
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
268
11
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
269 doctypes={}
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
270 for escidocid in escidocids:
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
271
15
ca3084877394 publicationslist
dwinter
parents: 14
diff changeset
272 txt, type, bookID,linksIdentifier,linksLocator = self.getEntryFromPubman(escidocid.escidocid, True)
11
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
273
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
274 if not doctypes.has_key(type):
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
275 doctypes[type]=[]
15
ca3084877394 publicationslist
dwinter
parents: 14
diff changeset
276
ca3084877394 publicationslist
dwinter
parents: 14
diff changeset
277 entry={}
ca3084877394 publicationslist
dwinter
parents: 14
diff changeset
278 entry['citation']= txt
ca3084877394 publicationslist
dwinter
parents: 14
diff changeset
279 entry['escidocId']= escidocid.escidocid
ca3084877394 publicationslist
dwinter
parents: 14
diff changeset
280 entry['bookId']=bookID
ca3084877394 publicationslist
dwinter
parents: 14
diff changeset
281 entry['linksIdentifier']=linksIdentifier
ca3084877394 publicationslist
dwinter
parents: 14
diff changeset
282 entry['linksLocator']=linksIdentifier
ca3084877394 publicationslist
dwinter
parents: 14
diff changeset
283 doctypes[type].append(entry)
11
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
284
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
285
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
286 return doctypes
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
287
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
288
18
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
289 def getEntryFromPubman(self,escidocid,extendedData=None,withContext=False):
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
290 """get one entry"""
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
291
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
292
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
293
3
602b6e46b176 error handling bei falscher suche
dwinter
parents: 2
diff changeset
294 escidocid=escidocid.lstrip().strip()
17
48c4a6f3b135 minor bug, handliung withdrawn entrie
dwinter
parents: 16
diff changeset
295 h = httplib2.Http(cacheFolder,timeout=TIMEOUT)
24
345dd913f520 new pubman
Dirk Wintergr?n <dwinter@mpiwg-berlin.mpg.de>
parents: 23
diff changeset
296 cn = self.pubmanURL+"cqlQuery=escidoc.objid=%s&"
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
297 cn +="exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
23
d24a8673d68e trying to fix bug in error message.
casties
parents: 22
diff changeset
298 content = None
18
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
299 try:
22
2abc89d58140 more error handling
casties
parents: 21
diff changeset
300 resp, content = h.request(cn%escidocid)
2abc89d58140 more error handling
casties
parents: 21
diff changeset
301 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
2abc89d58140 more error handling
casties
parents: 21
diff changeset
302 logging.debug(cn%escidocid)
18
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
303 root = ET.fromstring(content)
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
304 except:
22
2abc89d58140 more error handling
casties
parents: 21
diff changeset
305 logging.error("zopePubmanConnector: cannot parse: %s"%content)
23
d24a8673d68e trying to fix bug in error message.
casties
parents: 22
diff changeset
306 return "",""
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
307
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
308 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
309
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
310 itempath = ".//escidocItem:item"
17
48c4a6f3b135 minor bug, handliung withdrawn entrie
dwinter
parents: 16
diff changeset
311
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
312
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
313 item = root.find(itempath,ns) #get item
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
314
17
48c4a6f3b135 minor bug, handliung withdrawn entrie
dwinter
parents: 16
diff changeset
315 if item is None:
48c4a6f3b135 minor bug, handliung withdrawn entrie
dwinter
parents: 16
diff changeset
316 logging.error("pubman connector: cannot find %s"%escidocid)
48c4a6f3b135 minor bug, handliung withdrawn entrie
dwinter
parents: 16
diff changeset
317 return escidocid,"","","",""
48c4a6f3b135 minor bug, handliung withdrawn entrie
dwinter
parents: 16
diff changeset
318
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
319 citation=item.find(citationxpath,ns)
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
320
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
321
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
322 if citation is not None and extendedData is not None:
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
323
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
324 linksIdentifier=[]
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
325 linksLocator=[]
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
326
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
327
11
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
328
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
329 #get identifier
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
330 idTermPath =""".//escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/escidocMetadataProfile:publication/dc:identifier"""
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
331 #idTermPath =".//{http://purl.org/dc/elements/1.1/}identifier"
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
332
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
333 idterms = item.findall(idTermPath,ns)
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
334
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
335 bookID = None
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
336 logging.debug("zopePubmanConnector: %s"%idterms)
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
337 for idterm in idterms:
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
338
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
339 if idterm.get("{http://www.w3.org/2001/XMLSchema-instance}type",'') in ['eterms:OTHER','eidt:OTHER']: ##suche nach bookID
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
340 logging.debug("zopePubmanConnector: %s"%idterm.text)
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
341 checkID =idterm.text.lstrip().rstrip()
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
342 if checkID.startswith("MPIWG-Book:"):
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
343 bookID = checkID
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
344 break
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
345 elif idterm.get("{http://www.w3.org/2001/XMLSchema-instance}type",'') in ['eterms:URI','eidt:URI']:
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
346 linksIdentifier.append(idterm.text.lstrip().rstrip())
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
347
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
348
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
349 #get files and locators
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
350 componentsPath =""".//escidocComponents:components[1]"""
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
351
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
352 components=item.findall(componentsPath,ns);
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
353
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
354 for component in components:
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
355 cnt = component.find(".//escidocComponents:content",ns)
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
356 if cnt is not None:
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
357 link=""
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
358 title=""
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
359 type=""
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
360 for name,value in cnt.items():
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
361 if name.endswith("href"):
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
362 link=value
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
363 elif name.endswith("title"):
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
364 title=value
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
365 elif name.endswith("storage"):
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
366 type=value
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
367
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
368 linksLocator.append((title,link,type))
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
369
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
370
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
371
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
372
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
373
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
374
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
375
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
376
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
377
11
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
378 path = ".//escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/escidocMetadataProfile:publication"
14
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
379 publicationTag= item.find(path,ns);
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
380
d92decb037d6 bugs in publ
dwinter
parents: 13
diff changeset
381 return citation.text,publicationTag.get('type'),bookID,linksIdentifier,linksLocator
11
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
382
18
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
383
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
384 if citation is not None and withContext:
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
385 ctxPath=".//escidocItem:properties/srel:context"
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
386 ctx = item.find(ctxPath,ns)
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
387
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
388 return citation.text,ctx.get('objid')
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
389
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
390 if citation is not None:
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
391
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
392 return citation.text
11
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
393
18
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
394
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
395
da890cb0fd04 shows also context if wanted in getEntryFromPubman
dwinter
parents: 17
diff changeset
396
11
69a2f0b8a932 sortieren von treffer listen nach typen
dwinter
parents: 10
diff changeset
397 return "",''
2
9dbb9354abbe getPublications erweitert nach Typ
dwinter
parents: 1
diff changeset
398
0
373e6610e290 initial
dwinter
parents:
diff changeset
399 def pubmanConnectorURL(self):
24
345dd913f520 new pubman
Dirk Wintergr?n <dwinter@mpiwg-berlin.mpg.de>
parents: 23
diff changeset
400 return self.pubmanURL
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
401
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
402
10
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
403 def getPublicationsFromContext(self,context,limit=None,publicationType=None,search=None):
7
06befe15f66f dokumentation
dwinter
parents: 6
diff changeset
404 """gibt alle publicationen des context, jeweils als tupel ("escidoc:id",METADATEN)
06befe15f66f dokumentation
dwinter
parents: 6
diff changeset
405
06befe15f66f dokumentation
dwinter
parents: 6
diff changeset
406 METADATEN ist hierbei eine Map mit :
06befe15f66f dokumentation
dwinter
parents: 6
diff changeset
407 "citation" --> citation in der APA formatierung
06befe15f66f dokumentation
dwinter
parents: 6
diff changeset
408 "volume" --> volume
06befe15f66f dokumentation
dwinter
parents: 6
diff changeset
409 "link" --> dowloadlink
06befe15f66f dokumentation
dwinter
parents: 6
diff changeset
410 "abstracts" --> map mit deu/eng für den abstrakt
06befe15f66f dokumentation
dwinter
parents: 6
diff changeset
411 "authors" --> [(NACHNAME,VORNAME]),..]
06befe15f66f dokumentation
dwinter
parents: 6
diff changeset
412 "title"--> title
06befe15f66f dokumentation
dwinter
parents: 6
diff changeset
413 "year" --> issued
06befe15f66f dokumentation
dwinter
parents: 6
diff changeset
414 """
16
3e154b154b6f timeouts
dwinter
parents: 15
diff changeset
415 h = httplib2.Http(cacheFolder,timeout=TIMEOUT)
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
416
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
417 if publicationType is None:
24
345dd913f520 new pubman
Dirk Wintergr?n <dwinter@mpiwg-berlin.mpg.de>
parents: 23
diff changeset
418 cn = self.pubmanURL+"cqlQuery=(escidoc.context.objid=%22"+context+"%22"
345dd913f520 new pubman
Dirk Wintergr?n <dwinter@mpiwg-berlin.mpg.de>
parents: 23
diff changeset
419 #cn = self.pubmanURL+"cqlQuery=escidoc.objid=%22"+"escidoc:643455"+"%22&"
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
420 else:
24
345dd913f520 new pubman
Dirk Wintergr?n <dwinter@mpiwg-berlin.mpg.de>
parents: 23
diff changeset
421 cn = self.pubmanURL+"cqlQuery=(escidoc.context.objid=%22"+context+"%22"
10
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
422 cn +="%20and%20escidoc.publication.type=%22"+publicationType+"%22"
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
423
10
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
424 if search is not None and search != "":
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
425 try:
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
426 search = unicodedata.normalize('NFKD', search).encode('ASCII', 'ignore')
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
427 except:
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
428 search = unicodedata.normalize('NFKD', search.decode('utf-8')).encode('ASCII', 'ignore')
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
429 cn+="%20and%20escidoc.metadata="+search+""
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
430
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
431
007ba22a5eb1 umlaut suche jetzt normalisiert
dwinter
parents: 9
diff changeset
432 cn +=")&exportFormat=APA&outputFormat=snippet&language=all&sortKeys=escidoc.any-dates&sortOrder=descending"
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
433 if limit:
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
434 cn+="&maximumRecords=%s"%limit
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
435
9
166482535b54 timing output for debugging.
casties
parents: 8
diff changeset
436 startTime = time.time()
12
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
437 try:
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
438 logging.debug("getPublicationsFromContext: getting %s"%cn)
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
439 resp, content = h.request(cn)
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
440 logging.debug("getPublicationsFromContext: got data in %ss"%(time.time()-startTime))
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
441
12
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
442 ET.register_namespace("dcterms", "http://purl.org/dc/terms/")
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
443
12
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
444 root = ET.fromstring(content)
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
445
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
446 except Exception, e:
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
447 logging.error("Unable to read and parse data! %s"%e)
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
448 return []
55294833888f fix error without network connection.
casties
parents: 11
diff changeset
449
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
450 #<escidocItem:item objid="escidoc:630782"
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
451
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
452 citationxpath=".//{http://purl.org/dc/terms/}bibliographicCitation"
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
453 abstractpath=".//{http://purl.org/dc/terms/}abstract"
6
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
454 issuedpath=".//{http://purl.org/dc/terms/}issued"
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
455
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
456 creatorpath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/publication}creator/{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}person"
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
457 familyNamepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}family-name"
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
458 givenNamepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}given-name"
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
459
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
460
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
461 titlepath=".//{http://purl.org/dc/elements/1.1/}title"
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
462
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
463 objxpath=".//{http://www.escidoc.de/schemas/item/0.8}item"
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
464 srcpath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/publication}source"
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
465 volumepath=".//{http://escidoc.mpg.de/metadataprofile/schema/0.1/types}volume"
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
466
20
c91cfc8c3603 fix link field used e.g. for preprints
casties
parents: 19
diff changeset
467 linkspath=""".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content[@storage="internal-managed"]"""
21
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
468 visibility=""".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}properties/{http://escidoc.de/core/01/properties/}visibility"""
20
c91cfc8c3603 fix link field used e.g. for preprints
casties
parents: 19
diff changeset
469 #linkspath=""".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content[@storage="external-url"]"""
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
470 #linkspath=".//{http://www.escidoc.de/schemas/components/0.8}component/{http://www.escidoc.de/schemas/components/0.8}content"
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
471 citations=root.findall(objxpath)
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
472
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
473 ret=[]
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
474 for citation in citations:
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
475 objId = citation.get('objid')
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
476
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
477 text = citation.find(citationxpath)
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
478
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
479 #Get volume = preprintID
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
480 # <publication:source type="series">
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
481 # <dc:title>Max-Planck-Institut für Wissenschaftsgeschichte : Preprint</dc:title>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
482 # <escidoc:volume>437</escidoc:volume>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
483
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
484 src= citation.find(srcpath)
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
485 vol = src.find(volumepath)
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
486
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
487 #get link to fulltext
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
488 #<escidocComponents:component objid="escidoc:644183">
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
489 #<escidocComponents:properties>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
490 # <prop:creation-date>2013-04-29T09:00:01.100Z</prop:creation-date>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
491 # <prop:valid-status>valid</prop:valid-status>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
492 # <prop:visibility>public</prop:visibility>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
493 # <prop:content-category>pre-print</prop:content-category>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
494 # <prop:file-name>P437.PDF</prop:file-name>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
495 # <prop:mime-type>application/pdf</prop:mime-type>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
496 # <prop:checksum>d0ccdc62d6707d934e60e9839ffe30bf</prop:checksum>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
497 # <prop:checksum-algorithm>MD5</prop:checksum-algorithm>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
498 #</escidocComponents:properties>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
499 #<escidocComponents:content xlink:type="simple" xlink:title="P437.PDF" storage="internal-managed"
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
500 # xlink:href="http://pubman.mpiwg-berlin.mpg.de/pubman/item/escidoc:643686:3/component/escidoc:644183/P437.PDF"/>
21
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
501 #
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
502
21
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
503 vis= citation.find(visibility)
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
504
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
505 visText=""
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
506 if vis is not None:
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
507 visText =vis.text
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
508
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
509
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
510 print vis
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
511 scr=None
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
512
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
513 if visText != "private":
3562adeaba8b only show non private links in getPublicationsFromContext
dwinter
parents: 20
diff changeset
514 src= citation.find(linkspath)
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
515 if src is not None:
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
516
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
517 link=src.get("{http://www.w3.org/1999/xlink}href")
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
518 #logging.debug(src.attrib)
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
519
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
520 else:
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
521 link =""
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
522
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
523 #<dcterms:abstract xml:lang="deu">Dieser Preprint versammelt eine Auswahl von Beiträgen zum Symposium zu Ehren von Hans-Jörg Rheinbergers 65. Geburtstag. Es fand am 24.1.2011 im Max-Planck-Institute für Wissenschaftsgeschichte statt und brachte Freunde, Studenten und Kollegen von Hans-Jörg Rheinberger zusammen.</dcterms:abstract>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
524 #<dcterms:abstract xml:lang="eng">In this preprint, a selection of contributions to the symposium in honor of Hans-Jörg Rheinberger’s 65th birthday is published. It took place on January 24, 2011 at the Max-Planck-Institute for the History of Science and assembled friends, students and colleagues of Hans-Jörg Rheinberger.</dcterms:abstract>
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
525
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
526 abstracts = citation.findall(abstractpath)
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
527
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
528 abstractTexts={}
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
529 for abstract in abstracts:
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
530
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
531 lang = abstract.get("{http://www.w3.org/XML/1998/namespace}lang")
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
532 abstractTexts[lang]=abstract.text
6
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
533
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
534 authorsTags = citation.findall(creatorpath)
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
535
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
536 authors=[]
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
537 for author in authorsTags:
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
538
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
539 gn= author.find(givenNamepath).text
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
540 fn= author.find(familyNamepath).text
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
541
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
542 authors.append((fn,gn))
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
543
8
ddd7e357e518 changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents: 7
diff changeset
544 titleTag = citation.find(titlepath)
6
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
545
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
546 if titleTag is not None:
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
547 title = titleTag.text
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
548 else:
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
549 title=""
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
550
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
551 issuedTag = citation.find(issuedpath)
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
552
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
553 if issuedTag is not None:
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
554 issued = issuedTag.text
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
555 else:
49abb91d6c6a getPreprints gibt nun hash mit metadate zurueck
dwinter
parents: 5
diff changeset
556 issued=""
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
557
8
ddd7e357e518 changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents: 7
diff changeset
558 item = {"id":objId,
ddd7e357e518 changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents: 7
diff changeset
559 "citation":text.text,
ddd7e357e518 changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents: 7
diff changeset
560 "volume":vol.text,
ddd7e357e518 changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents: 7
diff changeset
561 "link":link,
ddd7e357e518 changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents: 7
diff changeset
562 "abstracts":abstractTexts,
ddd7e357e518 changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents: 7
diff changeset
563 "authors":authors,
ddd7e357e518 changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents: 7
diff changeset
564 "title":title,
ddd7e357e518 changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents: 7
diff changeset
565 "year":issued}
ddd7e357e518 changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents: 7
diff changeset
566
ddd7e357e518 changed getPreprints to getPublicationsFromContext. returns simple list.
casties
parents: 7
diff changeset
567 ret.append(item)
9
166482535b54 timing output for debugging.
casties
parents: 8
diff changeset
568
166482535b54 timing output for debugging.
casties
parents: 8
diff changeset
569 logging.debug("getPublicationsFromContext: done in %ss"%(time.time()-startTime))
4
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
570 return ret
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
571
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
572
f845502cf73a preprint methoden hinzugefuegt
dwinter
parents: 3
diff changeset
573
0
373e6610e290 initial
dwinter
parents:
diff changeset
574
373e6610e290 initial
dwinter
parents:
diff changeset
575 def manage_addZopePubmanConnectorForm(self):
373e6610e290 initial
dwinter
parents:
diff changeset
576 """Form for external Links"""
373e6610e290 initial
dwinter
parents:
diff changeset
577 pt=zptFile(self, 'zpt/AddZopePubmanConnector.zpt')
373e6610e290 initial
dwinter
parents:
diff changeset
578 return pt()
373e6610e290 initial
dwinter
parents:
diff changeset
579
373e6610e290 initial
dwinter
parents:
diff changeset
580
373e6610e290 initial
dwinter
parents:
diff changeset
581 def manage_addZopePubmanConnector(self,id,title,pubmanURL,RESPONSE=None):
373e6610e290 initial
dwinter
parents:
diff changeset
582 """Add an external Link"""
373e6610e290 initial
dwinter
parents:
diff changeset
583
373e6610e290 initial
dwinter
parents:
diff changeset
584 newObj=ZopePubmanConnector(id,title,pubmanURL)
373e6610e290 initial
dwinter
parents:
diff changeset
585
373e6610e290 initial
dwinter
parents:
diff changeset
586 self._setObject(id,newObj)
373e6610e290 initial
dwinter
parents:
diff changeset
587
373e6610e290 initial
dwinter
parents:
diff changeset
588 if RESPONSE is not None:
373e6610e290 initial
dwinter
parents:
diff changeset
589 RESPONSE.redirect('manage_main')
17
48c4a6f3b135 minor bug, handliung withdrawn entrie
dwinter
parents: 16
diff changeset
590