50
|
1 class MPIWGRoot_deleted:
|
|
2
|
|
3
|
33
|
4 def removeStopWords(self,xo):
|
1
|
5 """remove stop words from xo"""
|
|
6 if not hasattr(self,'_v_stopWords'):
|
|
7 self._v_stopWords=self.stopwords_en.data.split("\n")
|
|
8
|
|
9 x=str(xo)
|
|
10
|
|
11 strx=x.split(" ")
|
|
12
|
|
13 for tmp in strx:
|
|
14
|
|
15 if tmp.lower() in self._v_stopWords:
|
|
16 del strx[strx.index(tmp)]
|
|
17
|
|
18 return " ".join(strx)
|
|
19
|
|
20
|
|
21 def getGetNeighbourhood(self,obj, wordStr, length=100,tagging=True):
|
|
22 """finde umgebung um die worte in wordStr, zurueckgegeben wird eine Array mit den Umgebungen von Fundstellen der Worte
|
|
23 alle Tags werden entfernt, die Fundstellen werden mit <span class="found">XX</span> getaggt, die Umgebungen werden
|
|
24 case insensitive gesucht
|
|
25 @param wordStr: string mit Worten getrennt durch Leerzeichen, Phrasen sind mit " gekennzeichnet
|
|
26 "eine phrase", "*" bezeichnet wildcards und wird ignoriert"
|
|
27 @param length: optional, default wert 100, 2*length ist die groesse der Umgebung
|
|
28 @param tagging: optional default wert true, kein span tag wird erzweugt falls tag=false
|
|
29 """
|
|
30
|
|
31 ret=[] # nimmt das Array auf, dass spaeter zurueckgegeben wird
|
|
32 ranges=[] #Array mit tupeln x,y wobei x die Position des Anfang und y des Endes der i-ten Umgebung angiebt
|
|
33
|
|
34 wordStr=wordStr.lstrip().rstrip()
|
|
35
|
|
36 def isInRanges(nr,length):
|
|
37 """test ob eine gegeben Position nr schon irgendwo in einer Umgebung ist, gibt den Index des ersten Wertes aus ranges zurueck,
|
|
38 -1, wenn kein Treffer
|
|
39
|
|
40 @param nr: Position die geprueft werden soll
|
|
41 @param length: Laenge des Wortes das geprueft werden soll
|
|
42 """
|
|
43 for x in ranges:
|
|
44 if (x[0]<=nr) and (nr < (x[1]-length)):
|
|
45 return ranges.index(x)
|
|
46 return -1
|
|
47
|
|
48 # deal with phrases, in Phrasen werden die Leerzeichen durch "_" ersetzt.
|
|
49 def rep_empty(str):
|
|
50 x= re.sub(" ","_",str.group(0))
|
|
51 return re.sub("\"","",x)
|
|
52
|
|
53 wordStr=re.sub("\".*?\"", rep_empty,wordStr)#ersetze leerzeichen in " " durch "_" und loesche "
|
|
54
|
|
55 #deal with wildcards, for our purposes it is enough to delete the wildcard
|
|
56 wordStr=wordStr.replace("*","")
|
|
57
|
|
58 words=wordStr.split(" ")
|
|
59 #if not words is ListType:
|
|
60 # words=[words]
|
|
61
|
|
62
|
|
63 txtCache = self.en.getHarvestCache();
|
|
64 txt= txtCache.get(obj.absolute_url(),None)
|
|
65
|
|
66 if txt==None:
|
|
67
|
|
68 logging.debug("NO CACHE for: "+obj.absolute_url())
|
|
69 txt=obj.harvest_page(mode="slim")
|
|
70
|
|
71
|
|
72 if not txt:
|
|
73 return ret
|
|
74
|
|
75 soup = BeautifulSoup(txt)
|
|
76
|
|
77 comments = soup.findAll(text=lambda text:isinstance(text, Comment))
|
|
78 [comment.extract() for comment in comments]
|
|
79
|
|
80 txt = ''.join(soup.findAll(text=True))
|
|
81
|
|
82
|
|
83 #txt=re.sub("<.*?>", "", txt) # loesche alle Tags
|
|
84 for word in words:
|
|
85 word=re.sub("_"," ",word) # ersetze zurueck "_" durch " "
|
|
86 pos=0
|
|
87
|
|
88 n=txt.lower().count(word.lower()) # wie oft tritt das Wort auf
|
|
89
|
|
90 for i in range(n):
|
|
91 pos=txt.lower().find(word.lower(),pos)
|
|
92
|
|
93 if pos > 0:
|
|
94 x=max(0,pos-length)
|
|
95 y=min(len(txt),pos+length)
|
|
96
|
|
97
|
|
98 #is word already in one of the results
|
|
99 nr=isInRanges(pos,len(word))
|
|
100 if nr >=0:# word ist in einer schon gefunden Umgebung, dann vergroessere diese
|
|
101 x=min(ranges[nr][0],x)
|
|
102 y=max(ranges[nr][1],y)
|
|
103
|
|
104 str=txt[x:y]
|
|
105 if x!=0: #add dots if in the middle of text
|
|
106 str="..."+str
|
|
107
|
|
108 if y!=len(txt): #add dots if in the middle of text
|
|
109 str=str+"..."
|
|
110
|
|
111
|
|
112
|
|
113 if nr >=0: # word ist in einer schon gefunden Umgebung
|
|
114 ranges[nr]=(x,y) # neue Position der Umgebung
|
|
115
|
|
116 ret[nr]=str # neue Umgebung
|
|
117 else: # andernfalls neue Umgebung hinzufuegen
|
|
118 ranges.append((x,y))
|
|
119
|
|
120 ret.append(str)
|
|
121
|
|
122 pos=pos+len(word)
|
|
123 else:
|
|
124 break;
|
|
125
|
|
126 # now highlight everything
|
|
127 if tagging:
|
|
128 for x in range(len(ret)):
|
|
129 for word in words:
|
|
130 repl=re.compile(word,re.IGNORECASE)
|
|
131 ret[x]=repl.sub(""" <span class="found">%s</span>"""%word.upper(),ret[x])
|
|
132
|
|
133 return ret
|
|
134 def copyAllImagesToMargin(self):
|
|
135 """tranformiere alle Bilder in die Margins"""
|
|
136 projects=self.getTree()
|
|
137 ret=""
|
|
138 for project in projects:
|
|
139 proj=project[3]
|
|
140 try:
|
|
141 persons=proj.copyImageToMargin();
|
|
142 except:
|
|
143 logging.error("Cannnot do: %s"%repr(project))
|
|
144
|
|
145 def transformProjectsToId(self):
|
|
146 """trnasformiere zu ID, Hilfsfunktion die die alten Templates analysiert und mit der neuen Liste
|
|
147 verantwortlicher Personen versieht"""
|
|
148 projects=self.getTree()
|
|
149 ret=""
|
|
150 for project in projects:
|
|
151
|
|
152 proj=project[3]
|
|
153 persons=proj.identifyNames(proj.getContent('xdata_01'))
|
|
154 if not hasattr(proj,'responsibleScientistsList'):
|
|
155 proj.responsibleScientistsList=[]
|
|
156
|
|
157 for person in persons.items():
|
|
158
|
|
159 if len(person[1]) >1: #nicht eindeutig
|
|
160 ret+="nicht eindeutig --- %s: %s\n"%(proj.getId(),person[0])
|
|
161
|
|
162 elif len(person[1]) ==0: #kein eintrage
|
|
163 ret+="kein eintrag--- %s: %s\n"%(proj.getId(),person[0])
|
|
164 proj.responsibleScientistsList.append((person[0],""))
|
|
165 else:
|
|
166 proj.responsibleScientistsList.append((person[0],person[1][0].getObject().getKey()))
|
|
167
|
|
168 return ret
|
|
169
|
|
170
|
|
171 def harvestProjects(self):
|
|
172 """harvest"""
|
|
173 folder="/tmp"
|
|
174 try:
|
|
175 os.mkdir("/tmp/harvest_MPIWG")
|
|
176 except:
|
|
177 pass
|
|
178 founds=self.ZopeFind(self.aq_parent.projects,obj_metatypes=['MPIWGProject'],search_sub=1)
|
|
179 for found in founds:
|
|
180 txt=found[1].harvest_page()
|
|
181
|
|
182 if txt and (txt != ""):
|
|
183 name=found[0].replace("/","_")
|
|
184 fh=file("/tmp/harvest_MPIWG/"+name,"w")
|
|
185 fh.write(txt)
|
|
186 fh.close()
|
33
|
187
|
|
188
|
|
189
|
|
190 def generateNameIndex(self):
|
|
191 """erzeuge einen index verwendeter personen"""
|
|
192 import psycopg
|
|
193 o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0)
|
|
194 results={}
|
|
195 print self.fulltext.historicalNames.items()
|
|
196 for nameItem in self.fulltext.historicalNames.items(): #gehe durch alle namen des lexikons
|
|
197
|
|
198 c = o.cursor()
|
|
199 name=nameItem[0]
|
|
200 print "check",name
|
|
201 c.execute("select lastname,firstname from persons where lower(lastname) = '%s'"%quote(name))
|
|
202 tmpres=c.fetchall()
|
|
203 firstnames=[result[1] for result in tmpres] # find all firstnames
|
|
204 if tmpres:
|
|
205 lastname=tmpres[0][0]
|
|
206
|
|
207 for found in self.fulltext({'names':name}):
|
|
208 if found.getObject().isActual():
|
|
209 for nh in found.getObject().getGetNeighbourhood(name, length=50,tagging=False): #hole umgebung
|
|
210 #schaue nun ob der vorname hinter oder vor dem name ist
|
|
211 position=nh.find(lastname)
|
|
212 # vorher
|
|
213 #print "NH",nh
|
|
214 bevorS=nh[0:position].split()
|
|
215 #print "BV",bevorS
|
|
216 if len(bevorS)>1:
|
|
217 try:
|
|
218 bevor=[bevorS[-1],bevorS[-2]]
|
|
219 except:
|
|
220 bevor=[bevorS[0]]
|
|
221 else:
|
|
222 bevor=[]
|
|
223 #nachher
|
|
224 behindS= re.split("[,|;| ]",nh[position:])
|
|
225 #print "BH",behindS
|
|
226 if len(behindS)>2:
|
|
227 try:
|
|
228 behind=behindS[1:3]
|
|
229 except:
|
|
230 behind=[bevorS[1]]
|
|
231 else:
|
|
232 behind=[]
|
|
233 for firstname in firstnames:
|
|
234 if firstname in bevor+behind: #Namen wie mit Adelspraedikaten werden so erstmal nich gefunden
|
|
235 id="%s,%s"%(lastname,firstname)
|
|
236 if not results.has_key(id):
|
|
237 results[id]=[]
|
|
238 objId=found.getObject().getId()
|
|
239 if not (objId in results[id]):
|
|
240 print "d %s for %s"%(id,objId)
|
|
241 results[id].append(objId)
|
|
242 self.nameIndex=results
|
|
243 return results
|
|
244
|
|
245 def editNameIndexHTML(self):
|
|
246 """edit the name index"""
|
|
247 if not hasattr(self,'nameIndexEdited'): # falls editierter index noch nicht existiert, kopiere automatisch erstellten
|
|
248 self.nameIndexEdited=copy.copy(self.nameIndex)
|
|
249 print "huh"
|
|
250 #self.nameIndexEdited=copy.copy(self.nameIndex)
|
|
251 #print self.nameIndexEdited
|
|
252 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','editHistoricalNames.zpt')).__of__(self)
|
|
253 return pt()
|
|
254
|
|
255 def getNamesInProject(self,projectId):
|
|
256 """get all names ofnameIndexEdited which are references in projec with projectId"""
|
|
257
|
|
258 ret=[]
|
|
259 for name in self.nameIndexEdited.keys():
|
|
260 if projectId in self.nameIndexEdited[name]:
|
|
261 ret.append(name)
|
|
262
|
|
263 return ret
|
|
264
|
|
265 def editNameIndex(self,RESPONSE=None,name=None,occurrances=None,submit=None):
|
|
266 """edit the index"""
|
|
267 nI=self.nameIndexEdited # mI introduced to make sure that changes to nameIndexEdited are know to ZODB
|
|
268 if submit=="delete":
|
|
269
|
|
270
|
|
271 dh=getattr(self,'deletedHistoricalNames',{})
|
|
272
|
|
273 if type(dh) is ListType:
|
|
274 dh={}
|
|
275 if not dh.has_key(name):
|
|
276 dh[name]=occurrances.split("\n")
|
|
277 else:
|
|
278 dh[name]+=occurrances.split("\n")
|
|
279
|
|
280 self.deletedHistoricalNames=dh
|
|
281
|
|
282 del self.nameIndexEdited[name]
|
|
283
|
|
284
|
|
285 elif (submit=="change"):
|
|
286
|
|
287 nI[name]=occurrances.split("\n")[0:]
|
|
288
|
|
289 elif (submit=="add"):
|
|
290 if not nI.has_key(name):
|
|
291 nI[name]=occurrances.split("\n")
|
|
292 else:
|
|
293 nI[name]+=occurrances.split("\n")
|
|
294
|
|
295 self.nameIndexEdited=nI
|
|
296
|
|
297
|
|
298 if RESPONSE is not None:
|
|
299 RESPONSE.redirect('editNameIndexHTML')
|
|
300
|
|
301
|
|
302
|
|
303 def restoreIndex(self):
|
|
304 """restore"""
|
|
305 self.nameIndexEdited=self.nameIndex
|
|
306 return "done"
|
50
|
307
|
|
308 def getProjectsOfMembers(self,date=None):
|
|
309 """give tuple member /projects"""
|
|
310 ret=[]
|
|
311 members=self.getAllMembers()
|
|
312 logging.debug("X %s"%repr(members))
|
|
313 #return str(members)
|
|
314 for x in members:
|
|
315 #logging.debug("X %s"%repr(x))
|
|
316 projects=self.getProjectsOfMember(key=x[1],date=date)
|
|
317 if len(projects)>0:
|
|
318 ret.append((x[0],projects))
|
|
319
|
|
320 return ret
|
|
321
|
|
322 def getProjectsOfMember(self,key=None,date=None,onlyArchived=1,onlyActive=1):
|
|
323 """get projects of a member
|
33
|
324
|
50
|
325 @param key: (optional) Key zur Idenfikation des Benutzer
|
|
326 @param date: (optional) Version die zum Zeitpunkt date gueltig war
|
|
327 @param onlyArchived:
|
|
328 onlyArchived=0: alle Projekte
|
|
329 onlyArchived= 1 : nur aktuelle Projekte
|
|
330 onlyArchived = 2: nur archivierte Projekte
|
|
331 """
|
|
332 # TODO: Die ganze Loesung
|
|
333 def sortP(x,y):
|
|
334 """sort by sorting number"""
|
|
335 return cmp(x.WEB_title,y.WEB_title)
|
|
336
|
|
337 ret=[]
|
|
338 if key:
|
|
339 logging.debug("MPIWGROOT (getProjectsOfMember):"+key)
|
|
340 proj=self.ProjectCatalog({'getPersonKeyList':utf8ify(key)})
|
|
341 else:
|
|
342 return ret # key muss definiert sein
|
|
343
|
|
344 #logging.debug("MPIWGROOT (getProjectsOfMember):"+repr(proj))
|
|
345 if proj:
|
|
346 proj2=[]
|
|
347 for x in proj:
|
|
348 #logging.error("proj:%s"%repr(x.getPath()))
|
|
349 if (not getattr(x.getObject(),'invisible',None)) and (getattr(x.getObject(),'archiveTime','')==''):
|
|
350 proj2.append(x)
|
|
351
|
|
352 else:
|
|
353 proj2=[]
|
|
354
|
|
355
|
|
356
|
|
357 proj2.sort(sortP)
|
|
358
|
|
359 projectListe=[]
|
|
360 #logging.error("getprojectsofmember proj2: %s"%repr(proj2))
|
|
361 for proj in proj2:
|
|
362 obj=proj.getObject()
|
|
363 add=False
|
|
364 if onlyArchived==1: #nur aktuell projecte
|
|
365 if not obj.isArchivedProject():
|
|
366 add=True
|
|
367 elif onlyArchived==2: #nur archivierte
|
|
368 if obj.isArchivedProject():
|
|
369 add=True
|
|
370 else: #alle
|
|
371 add=True
|
|
372
|
|
373 if onlyActive==1: #nur active projecte
|
|
374 if obj.isActiveProject():
|
|
375 add=add & True
|
|
376 else:
|
|
377 add=add & False
|
|
378
|
|
379 elif onlyArchived==2: #nur nicht aktvive
|
|
380 if not obj.isActiveProject():
|
|
381 add=add & True
|
|
382 else: #alle
|
|
383 add=add & True
|
|
384
|
|
385 if add:
|
|
386 projectListe.append(obj)
|
|
387
|
|
388 #logging.error("getprojectsofmember projectliste: %s"%repr(projectListe))
|
|
389 return projectListe
|
|
390
|
|
391
|
|
392 def givePersonList(self,name):
|
|
393 """check if person is in personfolder and return list of person objects"""
|
|
394
|
|
395 splitted=name.split(",")
|
|
396 if len(splitted)==1:
|
|
397 splitted=name.lstrip().rstrip().split(" ")
|
|
398 splittedNew=[split.lstrip() for split in splitted]
|
|
399
|
|
400 if splittedNew[0]=='':
|
|
401 del splittedNew[0]
|
|
402 search=string.join(splittedNew,' AND ')
|
|
403
|
|
404 if not search=='':
|
|
405 proj=self.MembersCatalog({'title':search})
|
|
406
|
|
407 if proj:
|
|
408 return [[x.lastName,x.firstName] for x in proj]
|
|
409 else:
|
|
410 return []
|
|
411
|
|
412 ## splitted=name.split(",") # version nachname, vorname...
|
|
413 ## if len(splitted)>1:
|
|
414 ## lastName=splitted[0]
|
|
415 ## firstName=splitted[1]
|
|
416 ## else:
|
|
417 ## splitted=name.split(" ") #version vorname irgenwas nachnamae
|
|
418
|
|
419 ## lastName=splitted[len(splitted)-1]
|
|
420 ## firstName=string.join(splitted[0:len(splitted)-1])
|
|
421
|
|
422 ## objs=[]
|
|
423
|
|
424 #print self.members
|
|
425 ## for x in self.members.__dict__:
|
|
426 ## obj=getattr(self.members,x)
|
|
427 ## if hasattr(obj,'lastName') and hasattr(obj,'firstName'):
|
|
428
|
|
429 ## if (re.match(".*"+obj.lastName+".*",lastName) or re.match(".*"+lastName+".*",obj.lastName)) and (re.match(".*"+obj.firstName+".*",firstName) or re.match(".*"+firstName+".*",obj.firstName)):
|
|
430
|
|
431 ## objs.append((obj,lastName+", "+firstName))
|
|
432
|
|
433
|
|
434 ## return objs
|
|
435
|
|
436
|
|
437 def personCheck(self,names):
|
|
438 """all persons for list"""
|
|
439 #print "names",names
|
|
440 splitted=names.split(";")
|
|
441 ret={}
|
|
442 for name in splitted:
|
|
443
|
|
444 if not (name==""):
|
|
445 try:
|
|
446 ret[name]=self.givePersonList(name)
|
|
447 except:
|
|
448 """NOTHIHN"""
|
|
449 #print "RET",ret
|
|
450 return ret
|
|
451
|
|
452 def giveCheckList(self,person,fieldname):
|
|
453 """return checklist"""
|
|
454 #print "GCL",fieldname
|
|
455 if fieldname=='xdata_01':
|
|
456 x=self.personCheck(person.getContent(fieldname))
|
|
457 #print "GCLBACKX",x
|
|
458 return x
|
|
459
|
|
460
|
|
461 # TODO: do we need this here?
|
|
462 def isCheckField(self,fieldname):
|
|
463 """return chechfield"""
|
|
464 return (fieldname in checkFields)
|
|
465
|
|
466
|
|
467
|
|
468 def sortResults(self,results):
|
|
469 """search the catalog and give results back sorted by meta_type"""
|
|
470 ret = {}
|
|
471 logging.debug(results())
|
|
472 for result in results():
|
|
473 metaType = result.meta_type
|
|
474 resultList= ret.get(metaType,[])
|
|
475 resultList.append(result)
|
|
476 ret[metaType]=resultList
|
|
477
|
|
478 logging.debug(ret)
|
|
479 return ret
|
|
480
|
|
481 # TODO: remove
|
|
482 def isActiveMember(self,key):
|
|
483 """tested ob Mitarbeiter key ist aktiv"""
|
|
484 key=utf8ify(key)
|
|
485 ret=getAt(self.ZSQLInlineSearch(_table='personal_www',
|
|
486 _op_key='eq',key=key,
|
|
487 _op_publish_the_data='eq',
|
|
488 publish_the_data='yes'), 0)
|
|
489
|
|
490 logging.info("MPIWGROOT ACTIVE_MEMBER %s"%ret)
|
|
491 if ret:
|
|
492 return True
|
|
493 else:
|
|
494 return False
|
|
495
|
|
496 # TODO: remove
|
|
497 def isActual(self,project):
|
|
498 """checke if project is actual"""
|
|
499 actualTime=time.localtime()
|
|
500
|
|
501 if hasattr(project,'getObject'): #obj ist aus einer catalogTrefferList
|
|
502 obj=project.getObject()
|
|
503 else:
|
|
504 obj=project
|
|
505
|
|
506 if getattr(obj,'archiveTime',actualTime)< actualTime:
|
|
507 return False
|
|
508 else:
|
|
509 return True
|
|
510
|