comparison MPIWGRoot_deleted_methods.py @ 33:01b5265264b6

more work on projects.
author casties
date Wed, 24 Apr 2013 20:48:45 +0200
parents 1f2760ed3efe
children e30a4bd074db
comparison
equal deleted inserted replaced
32:6891732a148f 33:01b5265264b6
1 def removeStopWords(self,xo): 1 def removeStopWords(self,xo):
2 """remove stop words from xo""" 2 """remove stop words from xo"""
3 if not hasattr(self,'_v_stopWords'): 3 if not hasattr(self,'_v_stopWords'):
4 self._v_stopWords=self.stopwords_en.data.split("\n") 4 self._v_stopWords=self.stopwords_en.data.split("\n")
5 5
6 x=str(xo) 6 x=str(xo)
180 name=found[0].replace("/","_") 180 name=found[0].replace("/","_")
181 fh=file("/tmp/harvest_MPIWG/"+name,"w") 181 fh=file("/tmp/harvest_MPIWG/"+name,"w")
182 fh.write(txt) 182 fh.write(txt)
183 fh.close() 183 fh.close()
184 184
185
186
187 def generateNameIndex(self):
188 """erzeuge einen index verwendeter personen"""
189 import psycopg
190 o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0)
191 results={}
192 print self.fulltext.historicalNames.items()
193 for nameItem in self.fulltext.historicalNames.items(): #gehe durch alle namen des lexikons
194
195 c = o.cursor()
196 name=nameItem[0]
197 print "check",name
198 c.execute("select lastname,firstname from persons where lower(lastname) = '%s'"%quote(name))
199 tmpres=c.fetchall()
200 firstnames=[result[1] for result in tmpres] # find all firstnames
201 if tmpres:
202 lastname=tmpres[0][0]
203
204 for found in self.fulltext({'names':name}):
205 if found.getObject().isActual():
206 for nh in found.getObject().getGetNeighbourhood(name, length=50,tagging=False): #hole umgebung
207 #schaue nun ob der vorname hinter oder vor dem name ist
208 position=nh.find(lastname)
209 # vorher
210 #print "NH",nh
211 bevorS=nh[0:position].split()
212 #print "BV",bevorS
213 if len(bevorS)>1:
214 try:
215 bevor=[bevorS[-1],bevorS[-2]]
216 except:
217 bevor=[bevorS[0]]
218 else:
219 bevor=[]
220 #nachher
221 behindS= re.split("[,|;| ]",nh[position:])
222 #print "BH",behindS
223 if len(behindS)>2:
224 try:
225 behind=behindS[1:3]
226 except:
227 behind=[bevorS[1]]
228 else:
229 behind=[]
230 for firstname in firstnames:
231 if firstname in bevor+behind: #Namen wie mit Adelspraedikaten werden so erstmal nich gefunden
232 id="%s,%s"%(lastname,firstname)
233 if not results.has_key(id):
234 results[id]=[]
235 objId=found.getObject().getId()
236 if not (objId in results[id]):
237 print "d %s for %s"%(id,objId)
238 results[id].append(objId)
239 self.nameIndex=results
240 return results
241
242 def editNameIndexHTML(self):
243 """edit the name index"""
244 if not hasattr(self,'nameIndexEdited'): # falls editierter index noch nicht existiert, kopiere automatisch erstellten
245 self.nameIndexEdited=copy.copy(self.nameIndex)
246 print "huh"
247 #self.nameIndexEdited=copy.copy(self.nameIndex)
248 #print self.nameIndexEdited
249 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','editHistoricalNames.zpt')).__of__(self)
250 return pt()
251
252 def getNamesInProject(self,projectId):
253 """get all names ofnameIndexEdited which are references in projec with projectId"""
254
255 ret=[]
256 for name in self.nameIndexEdited.keys():
257 if projectId in self.nameIndexEdited[name]:
258 ret.append(name)
259
260 return ret
261
262 def editNameIndex(self,RESPONSE=None,name=None,occurrances=None,submit=None):
263 """edit the index"""
264 nI=self.nameIndexEdited # mI introduced to make sure that changes to nameIndexEdited are know to ZODB
265 if submit=="delete":
266
267
268 dh=getattr(self,'deletedHistoricalNames',{})
269
270 if type(dh) is ListType:
271 dh={}
272 if not dh.has_key(name):
273 dh[name]=occurrances.split("\n")
274 else:
275 dh[name]+=occurrances.split("\n")
276
277 self.deletedHistoricalNames=dh
278
279 del self.nameIndexEdited[name]
280
281
282 elif (submit=="change"):
283
284 nI[name]=occurrances.split("\n")[0:]
285
286 elif (submit=="add"):
287 if not nI.has_key(name):
288 nI[name]=occurrances.split("\n")
289 else:
290 nI[name]+=occurrances.split("\n")
291
292 self.nameIndexEdited=nI
293
294
295 if RESPONSE is not None:
296 RESPONSE.redirect('editNameIndexHTML')
297
298
299
300 def restoreIndex(self):
301 """restore"""
302 self.nameIndexEdited=self.nameIndex
303 return "done"
304