Mercurial > hg > MPIWGWeb
comparison MPIWGRoot.py @ 1:1f2760ed3efe
indices geloescht
author | dwinter |
---|---|
date | Wed, 30 Jan 2013 11:47:21 +0100 |
parents | bca61e893fcc |
children | ddf6c1a27a4b |
comparison
equal
deleted
inserted
replaced
0:bca61e893fcc | 1:1f2760ed3efe |
---|---|
80 #{'label':'update the institutsbibliography','action':'updateInstitutsbiliography'}, | 80 #{'label':'update the institutsbibliography','action':'updateInstitutsbiliography'}, |
81 #{'label':'Edit Historical Persons','action':'editHistoricalPersonsForm'}, | 81 #{'label':'Edit Historical Persons','action':'editHistoricalPersonsForm'}, |
82 #{'label':'Store Historical Persons','action':'storeHistoricalPersons'}, | 82 #{'label':'Store Historical Persons','action':'storeHistoricalPersons'}, |
83 ) | 83 ) |
84 | 84 |
85 | |
86 def getHarvestCachePort(self): | |
87 return getattr(self,"harvestPort",29999) | |
88 | |
89 def getHarvestCacheServer(self): | |
90 return getattr(self,"harvestServer","localhost") | |
91 | 85 |
92 | |
93 def getHarvestCache(self): | |
94 logging.debug("CACHE:"+repr(self._v_harvestCache)) | |
95 if self._v_harvestCache==None: | |
96 #storage = FileStorage.FileStorage('/tmp/'+self.getId()+'test-filestorage.fs') | |
97 addr = self.getHarvestCacheServer(), self.getHarvestCachePort() | |
98 storage = ClientStorage.ClientStorage(addr) | |
99 db = DB(storage) | |
100 self._v_harvestDV=db | |
101 self._v_harvestDV=db | |
102 conn = db.open() | |
103 dbroot = conn.root() | |
104 if not dbroot.has_key('templates'): | |
105 from BTrees.OOBTree import OOBTree | |
106 dbroot['templates'] = OOBTree() | |
107 | |
108 self._v_harvestCache = dbroot['templates'] | |
109 logging.debug("CACHE2:"+repr(self._v_harvestCache)) | |
110 return self._v_harvestCache | |
111 | |
112 | |
113 | |
114 def __del__(self): | |
115 if self._v_harvestCache!=None: | |
116 self._v_harvestDV.close(); | |
117 | 86 |
118 def getGetNeighbourhood(self,obj, wordStr, length=100,tagging=True): | 87 |
119 """finde umgebung um die worte in wordStr, zurueckgegeben wird eine Array mit den Umgebungen von Fundstellen der Worte | |
120 alle Tags werden entfernt, die Fundstellen werden mit <span class="found">XX</span> getaggt, die Umgebungen werden | |
121 case insensitive gesucht | |
122 @param wordStr: string mit Worten getrennt durch Leerzeichen, Phrasen sind mit " gekennzeichnet | |
123 "eine phrase", "*" bezeichnet wildcards und wird ignoriert" | |
124 @param length: optional, default wert 100, 2*length ist die groesse der Umgebung | |
125 @param tagging: optional default wert true, kein span tag wird erzweugt falls tag=false | |
126 """ | |
127 | |
128 ret=[] # nimmt das Array auf, dass spaeter zurueckgegeben wird | |
129 ranges=[] #Array mit tupeln x,y wobei x die Position des Anfang und y des Endes der i-ten Umgebung angiebt | |
130 | |
131 wordStr=wordStr.lstrip().rstrip() | |
132 | |
133 def isInRanges(nr,length): | |
134 """test ob eine gegeben Position nr schon irgendwo in einer Umgebung ist, gibt den Index des ersten Wertes aus ranges zurueck, | |
135 -1, wenn kein Treffer | |
136 | |
137 @param nr: Position die geprueft werden soll | |
138 @param length: Laenge des Wortes das geprueft werden soll | |
139 """ | |
140 for x in ranges: | |
141 if (x[0]<=nr) and (nr < (x[1]-length)): | |
142 return ranges.index(x) | |
143 return -1 | |
144 | |
145 # deal with phrases, in Phrasen werden die Leerzeichen durch "_" ersetzt. | |
146 def rep_empty(str): | |
147 x= re.sub(" ","_",str.group(0)) | |
148 return re.sub("\"","",x) | |
149 | |
150 wordStr=re.sub("\".*?\"", rep_empty,wordStr)#ersetze leerzeichen in " " durch "_" und loesche " | |
151 | |
152 #deal with wildcards, for our purposes it is enough to delete the wildcard | |
153 wordStr=wordStr.replace("*","") | |
154 | |
155 words=wordStr.split(" ") | |
156 #if not words is ListType: | |
157 # words=[words] | |
158 | |
159 | |
160 txtCache = self.en.getHarvestCache(); | |
161 txt= txtCache.get(obj.absolute_url(),None) | |
162 | |
163 if txt==None: | |
164 | |
165 logging.debug("NO CACHE for: "+obj.absolute_url()) | |
166 txt=obj.harvest_page(mode="slim") | |
167 | |
168 | |
169 if not txt: | |
170 return ret | |
171 | |
172 soup = BeautifulSoup(txt) | |
173 | |
174 comments = soup.findAll(text=lambda text:isinstance(text, Comment)) | |
175 [comment.extract() for comment in comments] | |
176 | |
177 txt = ''.join(soup.findAll(text=True)) | |
178 | |
179 | |
180 #txt=re.sub("<.*?>", "", txt) # loesche alle Tags | |
181 for word in words: | |
182 word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " | |
183 pos=0 | |
184 | |
185 n=txt.lower().count(word.lower()) # wie oft tritt das Wort auf | |
186 | |
187 for i in range(n): | |
188 pos=txt.lower().find(word.lower(),pos) | |
189 | |
190 if pos > 0: | |
191 x=max(0,pos-length) | |
192 y=min(len(txt),pos+length) | |
193 | |
194 | |
195 #is word already in one of the results | |
196 nr=isInRanges(pos,len(word)) | |
197 if nr >=0:# word ist in einer schon gefunden Umgebung, dann vergroessere diese | |
198 x=min(ranges[nr][0],x) | |
199 y=max(ranges[nr][1],y) | |
200 | |
201 str=txt[x:y] | |
202 if x!=0: #add dots if in the middle of text | |
203 str="..."+str | |
204 | |
205 if y!=len(txt): #add dots if in the middle of text | |
206 str=str+"..." | |
207 | |
208 | |
209 | |
210 if nr >=0: # word ist in einer schon gefunden Umgebung | |
211 ranges[nr]=(x,y) # neue Position der Umgebung | |
212 | |
213 ret[nr]=str # neue Umgebung | |
214 else: # andernfalls neue Umgebung hinzufuegen | |
215 ranges.append((x,y)) | |
216 | |
217 ret.append(str) | |
218 | |
219 pos=pos+len(word) | |
220 else: | |
221 break; | |
222 | |
223 # now highlight everything | |
224 if tagging: | |
225 for x in range(len(ret)): | |
226 for word in words: | |
227 repl=re.compile(word,re.IGNORECASE) | |
228 ret[x]=repl.sub(""" <span class="found">%s</span>"""%word.upper(),ret[x]) | |
229 | |
230 return ret | |
231 def copyAllImagesToMargin(self): | |
232 """tranformiere alle Bilder in die Margins""" | |
233 projects=self.getTree() | |
234 ret="" | |
235 for project in projects: | |
236 proj=project[3] | |
237 try: | |
238 persons=proj.copyImageToMargin(); | |
239 except: | |
240 logging.error("Cannnot do: %s"%repr(project)) | |
241 | |
242 def transformProjectsToId(self): | |
243 """trnasformiere zu ID, Hilfsfunktion die die alten Templates analysiert und mit der neuen Liste | |
244 verantwortlicher Personen versieht""" | |
245 projects=self.getTree() | |
246 ret="" | |
247 for project in projects: | |
248 | |
249 proj=project[3] | |
250 persons=proj.identifyNames(proj.getContent('xdata_01')) | |
251 if not hasattr(proj,'responsibleScientistsList'): | |
252 proj.responsibleScientistsList=[] | |
253 | |
254 for person in persons.items(): | |
255 | |
256 if len(person[1]) >1: #nicht eindeutig | |
257 ret+="nicht eindeutig --- %s: %s\n"%(proj.getId(),person[0]) | |
258 | |
259 elif len(person[1]) ==0: #kein eintrage | |
260 ret+="kein eintrag--- %s: %s\n"%(proj.getId(),person[0]) | |
261 proj.responsibleScientistsList.append((person[0],"")) | |
262 else: | |
263 proj.responsibleScientistsList.append((person[0],person[1][0].getObject().getKey())) | |
264 | |
265 return ret | |
266 | |
267 | |
268 def harvestProjects(self): | |
269 """harvest""" | |
270 folder="/tmp" | |
271 try: | |
272 os.mkdir("/tmp/harvest_MPIWG") | |
273 except: | |
274 pass | |
275 founds=self.ZopeFind(self.aq_parent.projects,obj_metatypes=['MPIWGProject'],search_sub=1) | |
276 for found in founds: | |
277 txt=found[1].harvest_page() | |
278 | |
279 if txt and (txt != ""): | |
280 name=found[0].replace("/","_") | |
281 fh=file("/tmp/harvest_MPIWG/"+name,"w") | |
282 fh.write(txt) | |
283 fh.close() | |
284 | |
285 def decode(self,str): | 88 def decode(self,str): |
286 """decoder""" | 89 """decoder""" |
287 | 90 |
288 if not str: | 91 if not str: |
289 return "" | 92 return "" |
605 def __init__(self, id, title): | 408 def __init__(self, id, title): |
606 """init""" | 409 """init""" |
607 self.id=id | 410 self.id=id |
608 self.title=title | 411 self.title=title |
609 | 412 |
610 def removeStopWords(self,xo): | 413 |
611 """remove stop words from xo""" | |
612 if not hasattr(self,'_v_stopWords'): | |
613 self._v_stopWords=self.stopwords_en.data.split("\n") | |
614 | |
615 x=str(xo) | |
616 | |
617 strx=x.split(" ") | |
618 | |
619 for tmp in strx: | |
620 | |
621 if tmp.lower() in self._v_stopWords: | |
622 del strx[strx.index(tmp)] | |
623 | |
624 return " ".join(strx) | |
625 | |
626 def urlQuote(self,str): | 414 def urlQuote(self,str): |
627 """quote""" | 415 """quote""" |
628 return urllib.quote(str) | 416 return urllib.quote(str) |
629 | 417 |
630 def urlUnQuote(self,str): | 418 def urlUnQuote(self,str): |
1060 self.MembersCatalog.manage_catalogReindex(self.REQUEST,RESPONSE,self.REQUEST['URL1']) | 848 self.MembersCatalog.manage_catalogReindex(self.REQUEST,RESPONSE,self.REQUEST['URL1']) |
1061 logger("MPIWG Root (reindexCatalog: members)",logging.INFO,"DONE") | 849 logger("MPIWG Root (reindexCatalog: members)",logging.INFO,"DONE") |
1062 except: | 850 except: |
1063 logger("MPIWG Root (reindexCatalog: members)",logging.WARNING," %s %s"%sys.exc_info()[:2]) | 851 logger("MPIWG Root (reindexCatalog: members)",logging.WARNING," %s %s"%sys.exc_info()[:2]) |
1064 | 852 |
1065 try: | 853 |
1066 | 854 # |
1067 self.fulltextProjectsMembers.manage_catalogReindex(self.REQUEST,RESPONSE,self.REQUEST['URL1']) | 855 # try: |
1068 logger("MPIWG Root (reindexCatalog: fulltextProjectsMembers)",logging.INFO,"DONE") | 856 # |
1069 except: | 857 # self.fulltextProjectsMembers.manage_catalogReindex(self.REQUEST,RESPONSE,self.REQUEST['URL1']) |
1070 logger("MPIWG Root (reindexCatalog: fulltextProjectsMembers)",logging.WARNING," %s %s"%sys.exc_info()[:2]) | 858 # logger("MPIWG Root (reindexCatalog: fulltextProjectsMembers)",logging.INFO,"DONE") |
1071 | 859 # except: |
1072 | 860 # logger("MPIWG Root (reindexCatalog: fulltextProjectsMembers)",logging.WARNING," %s %s"%sys.exc_info()[:2]) |
1073 | 861 # |
1074 | 862 # |
863 # | |
864 # | |
1075 | 865 |
1076 | 866 |
1077 if RESPONSE: | 867 if RESPONSE: |
1078 RESPONSE.redirect('manage_main') | 868 RESPONSE.redirect('manage_main') |
1079 | 869 |