--- MPIWGWeb/MPIWGProjects.py 2006/01/30 14:33:43 1.47.2.48
+++ MPIWGWeb/MPIWGProjects.py 2006/10/19 07:32:51 1.47.2.61
@@ -47,6 +47,30 @@ def getTextFromNode(nodename):
rc = rc + node.data
return rc
+
+def sortStopWordsF(self,xo,yo):
+ if not hasattr(self,'_v_stopWords'):
+ self._v_stopWords=self.stopwords_en.data.split("\n")
+
+ x=str(xo[1])
+ y=str(yo[1])
+
+ strx=x.split(" ")
+ stry=y.split(" ")
+
+ for tmp in strx:
+ if tmp.lower() in self._v_stopWords:
+ del strx[strx.index(tmp)]
+
+ for tmp in stry:
+ if tmp.lower() in self._v_stopWords:
+ del stry[stry.index(tmp)]
+
+ return cmp(" ".join(strx)," ".join(stry))
+
+def sortStopWords(self):
+ return lambda x,y : sortStopWordsF(self,x,y)
+
def sortF(x,y):
try:
return cmp(x[1],y[1])
@@ -270,6 +294,36 @@ class MPIWGRoot(ZSQLExtendFolder):
folders=['MPIWGProject','Folder','ECHO_Navigation']
meta_type='MPIWGRoot'
+ def harvestProjects(self):
+ """harvest"""
+ folder="/tmp"
+ try:
+ os.mkdir("/tmp/harvest_MPIWG")
+ except:
+ pass
+ founds=self.ZopeFind(self.aq_parent.projects,obj_metatypes=['MPIWGProject'],search_sub=1)
+ for found in founds:
+ txt=found[1].harvest_page()
+
+ if txt and (txt != ""):
+ name=found[0].replace("/","_")
+ fh=file("/tmp/harvest_MPIWG/"+name,"w")
+ fh.write(txt)
+ fh.close()
+
+ def decode(self,str):
+ """decoder"""
+ if not str:
+ return ""
+ if type(str) is StringType:
+ try:
+ return str.decode('utf-8')
+ except:
+ return str.decode('latin-1')
+ else:
+
+ return str
+
def versionHeaderEN(self):
"""version header text"""
@@ -340,7 +394,23 @@ class MPIWGRoot(ZSQLExtendFolder):
return True
else:
return False
-
+
+ def subNavStatic(self,obj):
+ """subnav" von self"""
+ def sortWeight(x,y):
+ x1=int(getattr(x[1],'weight','0'))
+ y1=int(getattr(y[1],'weight','0'))
+ return cmp(x1,y1)
+
+ subs=self.ZopeFind(obj,obj_metatypes=['MPIWGTemplate','MPIWGLink'])
+ subret=[]
+
+ for x in subs:
+ if not(x[1].title==""):
+ subret.append(x)
+ subret.sort(sortWeight)
+ return subret
+
def subNav(self,obj):
"""return subnav elemente"""
def sortWeight(x,y):
@@ -355,12 +425,14 @@ class MPIWGRoot(ZSQLExtendFolder):
#suche die zweite ebene
+
if not obj.aq_parent.getId() in ['de','en']:
obj=obj.aq_parent
-
+
while not self.ZopeFind(self,obj_ids=[obj.getId()]):
obj=obj.aq_parent
+
if hasattr(self,obj.getId()):
subs=self.ZopeFind(getattr(self,obj.getId()),obj_metatypes=['MPIWGTemplate','MPIWGLink'])
@@ -489,6 +561,22 @@ class MPIWGRoot(ZSQLExtendFolder):
self.id=id
self.title=title
+ def removeStopWords(self,xo):
+ """remove stop words from xo"""
+ if not hasattr(self,'_v_stopWords'):
+ self._v_stopWords=self.stopwords_en.data.split("\n")
+
+ x=str(xo)
+
+ strx=x.split(" ")
+
+ for tmp in strx:
+
+ if tmp.lower() in self._v_stopWords:
+ del strx[strx.index(tmp)]
+
+ return " ".join(strx)
+
def urlQuote(self,str):
"""quote"""
return urllib.quote(str)
@@ -634,7 +722,7 @@ class MPIWGRoot(ZSQLExtendFolder):
"""test"""
return self.getProjectsByFieldContent('xdata_09',['biology'])[0].absolute_url
- def getContexts(self,childs=None,parents=None,depth=None,date=None):
+ def getContexts(self,childs=None,parents=None,depth=None,date=None,onlyActive=True):
"""childs alle childs, alle parents"""
ret=[]
@@ -649,15 +737,17 @@ class MPIWGRoot(ZSQLExtendFolder):
if childs:
for project in self.getProjectFields('xdata_05',sort='int',date=date):
searchStr=childs+"(\..*)"
- if re.match(searchStr,project[1]):
-
- if depth:
-
- if int(depth)>=len(project[1].split("."))-len(childs.split(".")):
-
+
+ if (onlyActive and project[0].isActiveProject()) or (not onlyActive):
+ if re.match(searchStr,project[1]):
+
+ if depth:
+
+ if int(depth)>=len(project[1].split("."))-len(childs.split(".")):
+
+ ret.append(project)
+ else:
ret.append(project)
- else:
- ret.append(project)
return ret
def getProjectFields(self,fieldName,date=None,folder=None,sort=None):
@@ -685,6 +775,10 @@ class MPIWGRoot(ZSQLExtendFolder):
if sort=="int":
ret.sort(sortI)
+ elif sort=="stopWords":
+
+ ret.sort(sortStopWords(self))
+
else:
ret.sort(sortF)
@@ -765,10 +859,14 @@ class MPIWGRoot(ZSQLExtendFolder):
RESPONSE.setHeader('Content-type', 'text/html')
RESPONSE.write("
\n")
+ RESPONSE.write("Update Institutsbibliography\n")
+ ret=self.upDateSQL('personalwww.xml')
+ RESPONSE.write("done Insitutsbibliography:%s\n"%ret)
url="http://itgroup.mpiwg-berlin.mpg.de:8050/FMPro?-db=personal-www&-format=-dso_xml&-lay=sql_export&-max=20000&-findall"
dom = NonvalidatingReader.parseUri(url)
#fh=urllib.urlopen(url)
#dom=xml.dom.minidom.parse(fh)
+
RESPONSE.write("got_xml_File\n")
@@ -852,7 +950,7 @@ class MPIWGRoot(ZSQLExtendFolder):
done= obj.updateDBEntry(DBid=id,publish_the_data=publish_the_data,
date_from=date_from,
- date_to=date_to,stay_at_mpiwg=stay_at_mpiwg,position=position)
+ date_to=date_to,stay_at_mpiwg=stay_at_mpiwg,position=position,abteilung=abteilung)
if not done and (publish_the_data=='yes'):
ret=obj.createNewDBEntry(publish_the_data,id,name,
@@ -957,11 +1055,12 @@ class MPIWGRoot(ZSQLExtendFolder):
results=self.MembersCatalog({'isPublished':True})
- ret=[", ".join([proj.firstName, proj.lastName]) for proj in results]
+ ret=[", ".join([proj.lastName, proj.firstName]).decode('utf-8') for proj in results]
+ ret.sort()
return ret
def printAllMembers(self):
- """prin"""
+ """print"""
members=self.getAllMembers()
ret=""
for x in members:
@@ -977,7 +1076,7 @@ class MPIWGRoot(ZSQLExtendFolder):
return entry
- def getTree(self,date=None):
+ def getTree(self,dep=None,date=None,onlyActive=None):
"""generate Tree from project list"""
returnList=[]
@@ -991,12 +1090,18 @@ class MPIWGRoot(ZSQLExtendFolder):
#title=project[0].WEB_title
title=[project[0].getContent('WEB_title')]
#print title
- if idNr[0]!="x":
- returnList.append((depth,nr,title,project[0]))
-
+
+ if idNr[0]=="x": # kompatibilitŠt mit alter Konvention, x vor der Nummer macht project inactive
+ project[0].setActiveFlag(False)
+
+ if (not dep) or (idNr[0]==dep): #falls dep gesetzt ist nur dieses hinzufŸgen.
+ if onlyActive and project[0].isActiveProject(): #nur active projekte
+ returnList.append((depth,nr,title,project[0]))
+ elif not onlyActive:
+ returnList.append((depth,nr,title,project[0]))
return returnList
- def formatElementsAsList(self,elements):
+ def formatElementsAsList(self,elements,onlyOneDept=False):
"""formatiere tree als liste"""
actualDepth=0
@@ -1040,9 +1145,10 @@ class MPIWGRoot(ZSQLExtendFolder):
ret+="""\n"""
if actualDepth==1:
- departmentName={'1':'Department I','2':'Department II','3':'Department III', '4':'Ind. Research Group','5':'Ind. Research Group','6':'Research Network'}
+ departmentName={'1':'Department I','2':'Department II','3':'Department III', '4':'Ind. Research Group','5':'Ind. Research Group','6':'Ind. Research Group','7':'Research Network'}
department=element[3].getContent('xdata_05')
- ret+="""
%s: """%(department,departmentName[department])
+ if not onlyOneDept:
+ ret+="""
%s: """%(department,departmentName[department])
if self.REQUEST.has_key('date'):
ret+="""%s"""%(self.generateUrlProject(element[3].absolute_url())+"/index.html",self.REQUEST['date'],element[3].getContent('WEB_title'))
@@ -1110,11 +1216,21 @@ class MPIWGRoot(ZSQLExtendFolder):
for idNr in form.keys():
- if not (fields[int(idNr)][3].xdata_05==form[idNr]):
- fields[int(idNr)][3].xdata_05=form[idNr]
- fields[int(idNr)][3].copyObjectToArchive()
-
-
+
+ splitted=idNr.split('_')
+
+ if len(splitted)==1: # name des Feldes = idNr, dann Aendere id_nr
+ if not (fields[int(idNr)][3].xdata_05==form[idNr]):
+ fields[int(idNr)][3].xdata_05=form[idNr]
+ fields[int(idNr)][3].copyObjectToArchive()
+
+ if form.has_key(idNr+'_active'): # active flag is set
+
+ fields[int(splitted[0])][3].setActiveFlag(True)
+ else:
+
+ fields[int(splitted[0])][3].setActiveFlag(False)
+
if RESPONSE is not None:
RESPONSE.redirect('showTree')
@@ -1267,6 +1383,7 @@ class MPIWGRoot(ZSQLExtendFolder):
if splittedNew[0]=='':
del splittedNew[0]
search=string.join(splittedNew,' AND ')
+
if not search=='':
proj=self.MembersCatalog({'title':search})
@@ -1329,7 +1446,62 @@ class MPIWGRoot(ZSQLExtendFolder):
return (fieldname in checkFields)
-
+
+ def generateNameIndex(self):
+ """erzeuge einen index verwendeter personen"""
+ import psycopg
+ o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0)
+ results={}
+ print self.fulltext.historicalNames.items()
+ for nameItem in self.fulltext.historicalNames.items(): #gehe durch alle namen des lexikons
+
+ c = o.cursor()
+ name=nameItem[0]
+ print "check",name
+ c.execute("select lastname,firstname from persons where lower(lastname) = '%s'"%quote(name))
+ tmpres=c.fetchall()
+ firstnames=[result[1] for result in tmpres] # find all firstnames
+ if tmpres:
+ lastname=tmpres[0][0]
+
+ for found in self.fulltext({'names':name}):
+ if found.getObject().isActual():
+ for nh in found.getObject().getGetNeighbourhood(name, length=50,tagging=False): #hole umgebung
+ #schaue nun ob der vorname hinter oder vor dem name ist
+ position=nh.find(lastname)
+ # vorher
+ #print "NH",nh
+ bevorS=nh[0:position].split()
+ #print "BV",bevorS
+ if len(bevorS)>1:
+ try:
+ bevor=[bevorS[-1],bevorS[-2]]
+ except:
+ bevor=[bevorS[0]]
+ else:
+ bevor=[]
+ #nachher
+ behindS= re.split("[,|;| ]",nh[position:])
+ #print "BH",behindS
+ if len(behindS)>2:
+ try:
+ behind=behindS[1:3]
+ except:
+ behind=[bevorS[1]]
+ else:
+ behind=[]
+ for firstname in firstnames:
+ if firstname in bevor+behind: #Namen wie mit Adelspraedikaten werden so erstmal nich gefunden
+ id="%s,%s"%(lastname,firstname)
+ if not results.has_key(id):
+ results[id]=[]
+ objId=found.getObject().getId()
+ if not (objId in results[id]):
+ print "added %s for %s"%(id,objId)
+ results[id].append(objId)
+ self.nameIndex=results
+ return results
+
def manage_addMPIWGRootForm(self):
"""form for adding the root"""
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMPIWGRootForm.zpt')).__of__(self)
@@ -1397,6 +1569,7 @@ class MPIWGProject(CatalogAware,Folder):
meta_type='MPIWGProject'
default_catalog='ProjectCatalog'
+
def sortedByPlace(self,metatype):
"""find metatype and sort by place"""
def sort(x,y):
@@ -1614,6 +1787,17 @@ class MPIWGProject(CatalogAware,Folder):
if RESPONSE:
RESPONSE.redirect('manageImages')
+
+ def hasChildren(self,date=None,onlyActive=True):
+ """check if project has children"""
+ ct=self.getContexts(childs=self.getContent('xdata_05'),
+ depth=1,date=date,onlyActive=onlyActive)
+
+ if ct and len(ct)>0:
+ return True
+ else:
+ return False
+
def addImage(self,fileHd,caption,RESPONSE=None,filename=None):
"""add an MPIWG_Project_image"""
@@ -1771,6 +1955,8 @@ class MPIWGProject(CatalogAware,Folder):
self.creationTime=time.strftime("%Y%m%d%H%M%S",time.localtime())[0:]
self.id=id
self.title=id
+ self.isActiveFlag=True #Flag is true is the project is still active, False if accomplished
+
if argv:
for arg in definedFields:
try:
@@ -1791,6 +1977,14 @@ class MPIWGProject(CatalogAware,Folder):
)
+ def isActiveProject(self):
+ """check if the project is still active, default is true, set to false is the project is accomplished"""
+ return getattr(self,'isActiveFlag',True)
+
+ def setActiveFlag(self,status=True):
+ """set the active flag"""
+ self.isActiveFlag=status
+
def checkDate(self,date):
"""teste ob zum Zeitpunkt date eine andere version existierte"""
@@ -1831,7 +2025,109 @@ class MPIWGProject(CatalogAware,Folder):
"""warnung: project noch nicht existent"""
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','no_project')).__of__(self)
return pt()
-
+
+ def getGetNeighbourhood(self,wordStr, length=100,tagging=True):
+ """finde umgebung um die worte in wordStr, zurŸckgegeben wird eine Array mit den Umgebungen von Fundstellen der Worte
+ alle Tags werden entfernt, die Fundstellen werden mit XX getaggt, die Umgebungen werden
+ case insensitive gesucht
+ @param wordStr: string mit Worten getrennt durch Leerzeichen, Phrasen sind mit " gekennzeichnet
+ "eine phrase", "*" bezeichnet wildcards und wird ignoriert"
+ @param length: optional, default wert 100, 2*length ist die grš§e der Umgebung
+ @param tagging: optional default wert true, kein span tag wird erzweugt falls tag=false
+ """
+
+ ret=[] # nimmt das Array auf, dass spŠter zurŸckgegeben wird
+ ranges=[] #Array mit tupeln x,y wobei x die Position des Anfang und y des Endes der i-ten Umgebung angiebt
+
+ def isInRanges(nr,length):
+ """test ob eine gegeben Position nr schon irgendwo in einer Umgebung ist, gibt den Index des ersten Wertes aus ranges zurŸck,
+ -1, wenn kein Treffer
+
+ @param nr: Position die geprŸft werden soll
+ @param length: LŠnge des Wortes das geprŸft werden soll
+ """
+ for x in ranges:
+ if (x[0]<=nr) and (nr < (x[1]-length)):
+ return ranges.index(x)
+ return -1
+
+ # deal with phrases, in Phrasen werden die Leerzeichen durch "_" ersetzt.
+ def rep_empty(str):
+ x= re.sub(" ","_",str.group(0))
+ return re.sub("\"","",x)
+
+ wordStr=re.sub("\".*?\"", rep_empty,wordStr)#ersetze leerzeichen in " " durch "_" und loesche "
+
+ #deal with wildcards, for our purposes it is enough to delete the wildcard
+ wordStr=wordStr.replace("*","")
+
+ words=wordStr.split(" ")
+ #if not words is ListType:
+ # words=[words]
+
+ txt=self.harvest_page()
+ if not txt:
+ return ret
+ txt=re.sub("<.*?>", "", txt) # loesche alle Tags
+ for word in words:
+ word=re.sub("_"," ",word) # ersetze zurueck "_" durch " "
+ pos=0
+
+ n=txt.lower().count(word.lower()) # wie oft tritt das Wort auf
+
+ for i in range(n):
+ pos=txt.lower().find(word.lower(),pos)
+
+ if pos > 0:
+ x=max(0,pos-length)
+ y=min(len(txt),pos+length)
+
+
+ #is word already in one of the results
+ nr=isInRanges(pos,len(word))
+ if nr >=0:# word ist in einer schon gefunden Umgebung, dann vergrš§ere diese
+ x=min(ranges[nr][0],x)
+ y=max(ranges[nr][1],y)
+
+ str=txt[x:y]
+
+ if nr >=0: # word ist in einer schon gefunden Umgebung
+ ranges[nr]=(x,y) # neue Position der Umgebung
+
+ ret[nr]=str # neue Umgebung
+ else: # andernfalls neue Umgebung hinzufŸgen
+ ranges.append((x,y))
+
+ ret.append(str)
+
+ pos=pos+len(word)
+ else:
+ break;
+
+ # now highlight everything
+ if tagging:
+ for x in range(len(ret)):
+ for word in words:
+ repl=re.compile(word,re.IGNORECASE)
+ ret[x]=repl.sub(""" %s"""%word.upper(),ret[x])
+
+ return ret
+
+ def harvest_page(self,context=None):
+ """seite fuer harvesting fuer die Projektsuche"""
+ if not context:
+ context=self
+
+ if self.isActiveProject() and self.isActual():
+ ext=getattr(self,"harvest_main",None)
+ if ext:
+ return getattr(self,ext.getId())()
+
+ pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','harvest_main')).__of__(context)
+
+
+ return pt()
+
def index_html(self,request=True,context=None):
"""show homepage"""
if not context:
@@ -2060,6 +2356,10 @@ class MPIWGProject(CatalogAware,Folder):
setattr(self,x,[self.REQUEST[x].decode('utf-8')])
+ if self.REQUEST.has_key('active'):
+ self.setActiveFlag(True)
+ else:
+ self.setActiveFlag(False)
if fromEdit and (RESPONSE is not None):
RESPONSE.redirect('./editMPIWGBasisEditor')
@@ -2196,4 +2496,4 @@ def manage_addMPIWGProject(self,id,RESPO
if RESPONSE is not None:
- RESPONSE.redirect('manage_main')
+ RESPONSE.redirect('manage_main')
\ No newline at end of file