--- MPIWGWeb/MPIWGProjects.py 2006/01/16 13:46:21 1.47.2.46 +++ MPIWGWeb/MPIWGProjects.py 2006/11/08 16:36:38 1.47.2.65 @@ -2,7 +2,7 @@ for organizing and maintaining the different project pages """ -#TODO: mechanismus fur links to personen ueberarbeiten, da jetzt alle e_mails als members auftauchen unabhaengig vom status publish_the_data +#TODO: mechanismus fur links to personen ueberarbeiten, da jetzt alle e_mails als members auftauchen unabhaenig vom status publish_the_data #TODO: was passiert wenn aenderungen von jochen im filemaker nicht mit den aenderungen im sql uebereinstimmen #TODO: in einzelnen projecte steht als pfad auf die bilder noch wwwneu statt www @@ -33,6 +33,7 @@ from Ft.Xml.XPath import Evaluate from Ft.Xml.XPath.Context import Context from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print from Ft.Xml import EMPTY_NAMESPACE +import copy definedFields=['WEB_title','xdata_01','xdata_02','xdata_03','xdata_04','xdata_05','xdata_06','xdata_07','xdata_08','xdata_09','xdata_10','xdata_11','xdata_12','xdata_13','WEB_project_header','WEB_project_description','WEB_related_pub'] @@ -47,6 +48,30 @@ def getTextFromNode(nodename): rc = rc + node.data return rc + +def sortStopWordsF(self,xo,yo): + if not hasattr(self,'_v_stopWords'): + self._v_stopWords=self.stopwords_en.data.split("\n") + + x=str(xo[1]) + y=str(yo[1]) + + strx=x.split(" ") + stry=y.split(" ") + + for tmp in strx: + if tmp.lower() in self._v_stopWords: + del strx[strx.index(tmp)] + + for tmp in stry: + if tmp.lower() in self._v_stopWords: + del stry[stry.index(tmp)] + + return cmp(" ".join(strx)," ".join(stry)) + +def sortStopWords(self): + return lambda x,y : sortStopWordsF(self,x,y) + def sortF(x,y): try: return cmp(x[1],y[1]) @@ -123,9 +148,9 @@ class MPIWGLink(SimpleItem): else: return self.getObj().weight - manage_options=SimpleItem.manage_options+( + manage_options=( {'label':'main config','action':'changeLinkForm'}, - ) + )+SimpleItem.manage_options def changeLinkForm(self): @@ -257,7 +282,7 @@ def manage_addMPIWGTemplate(self, MPIWGT class MPIWGRoot(ZSQLExtendFolder): - """Stammordner für den Web-Server""" + """Stammordner fuer den Web-Server""" fieldLabels={'WEB_title':'WEB_Title','xdata_01':'Responsible Scientists','xdata_02':'Department', 'xdata_03':'Historical Persons','xdata_04':'Time period', @@ -270,6 +295,65 @@ class MPIWGRoot(ZSQLExtendFolder): folders=['MPIWGProject','Folder','ECHO_Navigation'] meta_type='MPIWGRoot' + def harvestProjects(self): + """harvest""" + folder="/tmp" + try: + os.mkdir("/tmp/harvest_MPIWG") + except: + pass + founds=self.ZopeFind(self.aq_parent.projects,obj_metatypes=['MPIWGProject'],search_sub=1) + for found in founds: + txt=found[1].harvest_page() + + if txt and (txt != ""): + name=found[0].replace("/","_") + fh=file("/tmp/harvest_MPIWG/"+name,"w") + fh.write(txt) + fh.close() + + def decode(self,str): + """decoder""" + if not str: + return "" + if type(str) is StringType: + try: + return str.decode('utf-8') + except: + return str.decode('latin-1') + else: + + return str + + + def browserCheck(self): + """check the browsers request to find out the browser type""" + bt = {} + ua = self.REQUEST.get_header("HTTP_USER_AGENT") + bt['ua'] = ua + bt['isIE'] = False + bt['isN4'] = False + if string.find(ua, 'MSIE') > -1: + bt['isIE'] = True + else: + bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) + + try: + nav = ua[string.find(ua, '('):] + ie = string.split(nav, "; ")[1] + if string.find(ie, "MSIE") > -1: + bt['versIE'] = string.split(ie, " ")[1] + except: pass + + bt['isMac'] = string.find(ua, 'Macintosh') > -1 + bt['isWin'] = string.find(ua, 'Windows') > -1 + bt['isIEWin'] = bt['isIE'] and bt['isWin'] + bt['isIEMac'] = bt['isIE'] and bt['isMac'] + bt['staticHTML'] = False + + return bt + + def versionHeaderEN(self): """version header text""" @@ -340,7 +424,23 @@ class MPIWGRoot(ZSQLExtendFolder): return True else: return False - + + def subNavStatic(self,obj): + """subnav" von self""" + def sortWeight(x,y): + x1=int(getattr(x[1],'weight','0')) + y1=int(getattr(y[1],'weight','0')) + return cmp(x1,y1) + + subs=self.ZopeFind(obj,obj_metatypes=['MPIWGTemplate','MPIWGLink']) + subret=[] + + for x in subs: + if not(x[1].title==""): + subret.append(x) + subret.sort(sortWeight) + return subret + def subNav(self,obj): """return subnav elemente""" def sortWeight(x,y): @@ -355,12 +455,14 @@ class MPIWGRoot(ZSQLExtendFolder): #suche die zweite ebene + if not obj.aq_parent.getId() in ['de','en']: obj=obj.aq_parent - + while not self.ZopeFind(self,obj_ids=[obj.getId()]): obj=obj.aq_parent + if hasattr(self,obj.getId()): subs=self.ZopeFind(getattr(self,obj.getId()),obj_metatypes=['MPIWGTemplate','MPIWGLink']) @@ -381,6 +483,30 @@ class MPIWGRoot(ZSQLExtendFolder): return True return False + def getSection(self): + """returns the current section name""" + root = self.absolute_url() + url = self.REQUEST['URL'] + path = string.replace(url, root, '') + paths = path.split('/') + if len(paths) > 0: + sec = paths[1] + if sec.find('.') < 0: + return sec + else: + return None + return None + + def getSectionStyle(self, name, style=""): + """returns a string with the given style + '-sel' if the current section == name""" + if self.getSection() == name: + return style + '-sel' + else: + return style + + def MPIWGrootURL(self): + """returns the URL to the root""" + return self.absolute_url() def upDateSQL(self,fileName): """updates SQL databases using fm.jar""" @@ -489,6 +615,22 @@ class MPIWGRoot(ZSQLExtendFolder): self.id=id self.title=title + def removeStopWords(self,xo): + """remove stop words from xo""" + if not hasattr(self,'_v_stopWords'): + self._v_stopWords=self.stopwords_en.data.split("\n") + + x=str(xo) + + strx=x.split(" ") + + for tmp in strx: + + if tmp.lower() in self._v_stopWords: + del strx[strx.index(tmp)] + + return " ".join(strx) + def urlQuote(self,str): """quote""" return urllib.quote(str) @@ -497,89 +639,10 @@ class MPIWGRoot(ZSQLExtendFolder): """quote""" return urllib.unquote(str) - def harvestHistoricalPersons(self): - """erstelle liste aller erwaehnten actors""" - - def normalize(str): - """loesche fuhrendes space""" - if (len(str)>1) and (str[0]==" "): - ret=str[1:] - else: - ret=str - return ret - list={} - projects=self.ZopeFind(self.projects,obj_metatypes=['MPIWGProject']) - - for project in projects: - lg=len(project[1].xdata_03[0])-1 - - if (lg>1) and (project[1].xdata_03[0][lg]==";"): - project[1].xdata_03[0]=project[1].xdata_03[0][0:lg] - - - - - try: - if len(project[1].xdata_03[0].split(";"))>1: # guess if separator is ; - for person in project[1].xdata_03[0].split(";"): - personNormal=normalize(person) - if personNormal in list.keys(): - list[urllib.quote(personNormal)][1].append(project[1]) - else: - list[urllib.quote(personNormal)]=(personNormal,[project[1]]) - else: #guess , is sepeator - for person in project[1].xdata_03[0].split(","): - personNormal=normalize(person) - if urllib.quote(personNormal) in list.keys(): - list[urllib.quote(personNormal)][1].append(project[1]) - else: - list[urllib.quote(personNormal)]=(personNormal,[project[1]]) - - except: - zLOG.LOG("MPIWG Web (harvestHistoricalPerson)",zLOG.ERROR,"cannot analyize: %s"%repr(project)) - - - return list - - def storeHistoricalPersons(self,RESPONSE=None): - """store persons""" - self.personDict={} - personDict=self.harvestHistoricalPersons() - for person in personDict.keys(): - for project in personDict[person][1]: - if person in self.personDict.keys(): - self.personDict[person][1].append((project.absolute_url(),project.WEB_title[0],project.xdata_01[0])) - else: - self.personDict[person]=(personDict[person][0],[(project.absolute_url(),project.WEB_title[0],project.xdata_01[0])]) - - if RESPONSE is not None: - RESPONSE.redirect("showHistoricalPersons") - - - def getPersonDict(self,name): - """name von dict""" - - try: - return self.personDict[name][0].encode('utf-8') - except: - return self.personDict[name][0] - return self.personDict[name][0].decode('latin-1').encode('utf-8') - - - def showHistoricalPersons(self): - """show persons""" - pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','showHistoricalPersons')).__of__(self) - return pt() - - - def editHistoricalPersonsForm(self): - """edit historical persons for consistency""" - pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','editHistoricalPersonsForm')).__of__(self) - return pt() def getProjectsByFieldContent(self,fieldName,fieldContentsEntry, date=None): - """gib alle Projekte aus mit Value von field mit fieldName enthält ein Element der Liste fieldContents""" + """gib alle Projekte aus mit Value von field mit fieldName enthaelt ein Element der Liste fieldContents""" def sort(x,y): return cmp(x.WEB_title[0],y.WEB_title[0]) @@ -634,7 +697,7 @@ class MPIWGRoot(ZSQLExtendFolder): """test""" return self.getProjectsByFieldContent('xdata_09',['biology'])[0].absolute_url - def getContexts(self,childs=None,parents=None,depth=None,date=None): + def getContexts(self,childs=None,parents=None,depth=None,date=None,onlyActive=True): """childs alle childs, alle parents""" ret=[] @@ -649,15 +712,17 @@ class MPIWGRoot(ZSQLExtendFolder): if childs: for project in self.getProjectFields('xdata_05',sort='int',date=date): searchStr=childs+"(\..*)" - if re.match(searchStr,project[1]): - - if depth: - - if int(depth)>=len(project[1].split("."))-len(childs.split(".")): - + + if (onlyActive and project[0].isActiveProject()) or (not onlyActive): + if re.match(searchStr,project[1]): + + if depth: + + if int(depth)>=len(project[1].split("."))-len(childs.split(".")): + + ret.append(project) + else: ret.append(project) - else: - ret.append(project) return ret def getProjectFields(self,fieldName,date=None,folder=None,sort=None): @@ -685,6 +750,10 @@ class MPIWGRoot(ZSQLExtendFolder): if sort=="int": ret.sort(sortI) + elif sort=="stopWords": + + ret.sort(sortStopWords(self)) + else: ret.sort(sortF) @@ -765,10 +834,14 @@ class MPIWGRoot(ZSQLExtendFolder): RESPONSE.setHeader('Content-type', 'text/html') RESPONSE.write("
\n") + RESPONSE.write("Update Institutsbibliography\n") + ret=self.upDateSQL('personalwww.xml') + RESPONSE.write("done Insitutsbibliography:%s\n"%ret) url="http://itgroup.mpiwg-berlin.mpg.de:8050/FMPro?-db=personal-www&-format=-dso_xml&-lay=sql_export&-max=20000&-findall" dom = NonvalidatingReader.parseUri(url) #fh=urllib.urlopen(url) #dom=xml.dom.minidom.parse(fh) + RESPONSE.write("got_xml_File\n") @@ -852,7 +925,7 @@ class MPIWGRoot(ZSQLExtendFolder): done= obj.updateDBEntry(DBid=id,publish_the_data=publish_the_data, date_from=date_from, - date_to=date_to,stay_at_mpiwg=stay_at_mpiwg,position=position) + date_to=date_to,stay_at_mpiwg=stay_at_mpiwg,position=position,abteilung=abteilung) if not done and (publish_the_data=='yes'): ret=obj.createNewDBEntry(publish_the_data,id,name, @@ -888,6 +961,7 @@ class MPIWGRoot(ZSQLExtendFolder): + if RESPONSE: RESPONSE.redirect('manage_main') @@ -933,25 +1007,35 @@ class MPIWGRoot(ZSQLExtendFolder): RESPONSE.redirect('manage_main') - def getAllMembers(self): - """give list of all members""" - ret=[] +# def getAllMembers(self,reset=None): +# """give list of all members""" +# ret=[] +# +# +# if reset or (getattr(self,'_v_membersList',None) is None): +# for member in self.members._objects: +# if member['meta_type']=='MPIWGStaff': +# memberObj=getattr(self.members,member['id']) +# if memberObj.isPublished(): +# ret.append(memberObj.title.decode('utf-8')) +# +# ret.sort() +# self._v_membersList=ret[0:] +# print ret +# +# return self._v_membersList - #for x in self.members.objectValues('MPIWGStaff'): - #print x.title - # ret.append(x.title.decode('utf-8')) - - for x in self.ZopeFind(self.members,obj_metatypes=['MPIWGStaff']): - if x[1].isPublished(): - ret.append(x[1].title.decode('utf-8')) - - ret.sort() - #print ret - + def getAllMembers(self): + #ret=[] + + results=self.MembersCatalog({'isPublished':True}) + + ret=[", ".join([proj.lastName, proj.firstName]).decode('utf-8') for proj in results] + + ret.sort() return ret - def printAllMembers(self): - """prin""" + """print""" members=self.getAllMembers() ret="" for x in members: @@ -967,7 +1051,7 @@ class MPIWGRoot(ZSQLExtendFolder): return entry - def getTree(self,date=None): + def getTree(self,dep=None,date=None,onlyActive=None): """generate Tree from project list""" returnList=[] @@ -981,12 +1065,18 @@ class MPIWGRoot(ZSQLExtendFolder): #title=project[0].WEB_title title=[project[0].getContent('WEB_title')] #print title - if idNr[0]!="x": - returnList.append((depth,nr,title,project[0])) - + + if idNr[0]=="x": # kompatibilitaet mit alter Konvention, x vor der Nummer macht project inactive + project[0].setActiveFlag(False) + + if (not dep) or (idNr[0]==dep): #falls dep gesetzt ist nur dieses hinzufuegen. + if onlyActive and project[0].isActiveProject(): #nur active projekte + returnList.append((depth,nr,title,project[0])) + elif not onlyActive: + returnList.append((depth,nr,title,project[0])) return returnList - def formatElementsAsList(self,elements): + def formatElementsAsList(self,elements,onlyOneDept=False): """formatiere tree als liste""" actualDepth=0 @@ -1030,9 +1120,10 @@ class MPIWGRoot(ZSQLExtendFolder): ret+="""""") + + imageURLs=[] + imageCaptions=[] + for split in splitted[1:]: + tmp=split.split("
") + #return repr(splitted[1]) + try: + imageURLs.append(tmp[0].split("\"")[1].encode('utf-8')) + except: + imageURLs.append(tmp[0].split("src=")[1].split(" ")[0].encode('utf-8')) + + split2="".join(tmp[1:]) - filename=self.imageURL.split("/")[-1] - #lege neues images object an, mit leerem bild - if self.ZopeFind(self,obj_ids=[filename]): - #existiert das bild schon, dann neueun filenamen - filename="project_image_"+filename - - self.addImage(None,self.imagecap,filename=filename) - #hole die bilddaten aus der url - url=self.absolute_url()+"/"+self.imageURL - #url=self.absolute_url()+"/"+filename + splitted=split2.split("""""") + if len(splitted)>1: + tmp=splitted[1].split("
") + imageCaptions.append(tmp[0].encode('utf-8')) + + + else: + #keine caption + + imageCaptions.append("") + + + - try:#relative url - data=urllib.urlopen(url).read() - except: - try:#absolute - data=urllib.urlopen(self.imageURL).read() + + + + #eintragen: + for imageURL in imageURLs: + filename=imageURL.split("/")[-1] + #lege neues images object an, mit leerem bild + + if self.ZopeFind(self,obj_ids=[filename]): + #existiert das bild schon, dann neuen filenamen + filename="project_image_"+filename + + self.addImage(None,imageCaptions[imageURLs.index(imageURL)],filename=filename) + #hole die bilddaten aus der url + url=self.absolute_url()+"/"+imageURL + #url=self.absolute_url()+"/"+filename + + try:#relative url + data=urllib.urlopen(url).read() except: - zLOG.LOG("MPIWG Project",zLOG.ERROR,"can't open: %s"%url) + try:#absolute + data=urllib.urlopen(self.imageURL).read() + except: + zLOG.LOG("MPIWG Project",zLOG.ERROR,"can't open: %s"%url) - obj=getattr(self,filename) - obj.update_data(data) + obj=getattr(self,filename) + obj.update_data(data) if RESPONSE: RESPONSE.redirect('manageImages') @@ -1565,6 +1890,17 @@ class MPIWGProject(CatalogAware,Folder): if RESPONSE: RESPONSE.redirect('manageImages') + + def hasChildren(self,date=None,onlyActive=True): + """check if project has children""" + ct=self.getContexts(childs=self.getContent('xdata_05'), + depth=1,date=date,onlyActive=onlyActive) + + if ct and len(ct)>0: + return True + else: + return False + def addImage(self,fileHd,caption,RESPONSE=None,filename=None): """add an MPIWG_Project_image""" @@ -1702,7 +2038,7 @@ class MPIWGProject(CatalogAware,Folder): def generateTemplate(self,RESPONSE=None): - """Erzeuge Template für defined fields not_used""" + """Erzeuge Template fuer defined fields not_used""" id="index_html" title=id @@ -1722,6 +2058,8 @@ class MPIWGProject(CatalogAware,Folder): self.creationTime=time.strftime("%Y%m%d%H%M%S",time.localtime())[0:] self.id=id self.title=id + self.isActiveFlag=True #Flag is true is the project is still active, False if accomplished + if argv: for arg in definedFields: try: @@ -1742,6 +2080,14 @@ class MPIWGProject(CatalogAware,Folder): ) + def isActiveProject(self): + """check if the project is still active, default is true, set to false is the project is accomplished""" + return getattr(self,'isActiveFlag',True) + + def setActiveFlag(self,status=True): + """set the active flag""" + self.isActiveFlag=status + def checkDate(self,date): """teste ob zum Zeitpunkt date eine andere version existierte""" @@ -1782,7 +2128,109 @@ class MPIWGProject(CatalogAware,Folder): """warnung: project noch nicht existent""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','no_project')).__of__(self) return pt() - + + def getGetNeighbourhood(self,wordStr, length=100,tagging=True): + """finde umgebung um die worte in wordStr, zurueckgegeben wird eine Array mit den Umgebungen von Fundstellen der Worte + alle Tags werden entfernt, die Fundstellen werden mit XX getaggt, die Umgebungen werden + case insensitive gesucht + @param wordStr: string mit Worten getrennt durch Leerzeichen, Phrasen sind mit " gekennzeichnet + "eine phrase", "*" bezeichnet wildcards und wird ignoriert" + @param length: optional, default wert 100, 2*length ist die groesse der Umgebung + @param tagging: optional default wert true, kein span tag wird erzweugt falls tag=false + """ + + ret=[] # nimmt das Array auf, dass spaeter zurueckgegeben wird + ranges=[] #Array mit tupeln x,y wobei x die Position des Anfang und y des Endes der i-ten Umgebung angiebt + + def isInRanges(nr,length): + """test ob eine gegeben Position nr schon irgendwo in einer Umgebung ist, gibt den Index des ersten Wertes aus ranges zurueck, + -1, wenn kein Treffer + + @param nr: Position die geprueft werden soll + @param length: Laenge des Wortes das geprueft werden soll + """ + for x in ranges: + if (x[0]<=nr) and (nr < (x[1]-length)): + return ranges.index(x) + return -1 + + # deal with phrases, in Phrasen werden die Leerzeichen durch "_" ersetzt. + def rep_empty(str): + x= re.sub(" ","_",str.group(0)) + return re.sub("\"","",x) + + wordStr=re.sub("\".*?\"", rep_empty,wordStr)#ersetze leerzeichen in " " durch "_" und loesche " + + #deal with wildcards, for our purposes it is enough to delete the wildcard + wordStr=wordStr.replace("*","") + + words=wordStr.split(" ") + #if not words is ListType: + # words=[words] + + txt=self.harvest_page() + if not txt: + return ret + txt=re.sub("<.*?>", "", txt) # loesche alle Tags + for word in words: + word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " + pos=0 + + n=txt.lower().count(word.lower()) # wie oft tritt das Wort auf + + for i in range(n): + pos=txt.lower().find(word.lower(),pos) + + if pos > 0: + x=max(0,pos-length) + y=min(len(txt),pos+length) + + + #is word already in one of the results + nr=isInRanges(pos,len(word)) + if nr >=0:# word ist in einer schon gefunden Umgebung, dann vergroessere diese + x=min(ranges[nr][0],x) + y=max(ranges[nr][1],y) + + str=txt[x:y] + + if nr >=0: # word ist in einer schon gefunden Umgebung + ranges[nr]=(x,y) # neue Position der Umgebung + + ret[nr]=str # neue Umgebung + else: # andernfalls neue Umgebung hinzufuegen + ranges.append((x,y)) + + ret.append(str) + + pos=pos+len(word) + else: + break; + + # now highlight everything + if tagging: + for x in range(len(ret)): + for word in words: + repl=re.compile(word,re.IGNORECASE) + ret[x]=repl.sub(""" %s"""%word.upper(),ret[x]) + + return ret + + def harvest_page(self,context=None): + """seite fuer harvesting fuer die Projektsuche""" + if not context: + context=self + + if self.isActiveProject() and self.isActual(): + ext=getattr(self,"harvest_main",None) + if ext: + return getattr(self,ext.getId())() + + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','harvest_main')).__of__(context) + + + return pt() + def index_html(self,request=True,context=None): """show homepage""" if not context: @@ -1884,7 +2332,7 @@ class MPIWGProject(CatalogAware,Folder): else: text5=text2 - #teste ob WEB_project_description und keine führenden p tags + #teste ob WEB_project_description und keine fuehrenden p tags if (len(text5)>4) and (not text5[0:3]=='') and (field=='WEB_project_description'): text5= "
"+text5+"
" @@ -2011,6 +2459,12 @@ class MPIWGProject(CatalogAware,Folder): setattr(self,x,[self.REQUEST[x].decode('utf-8')]) + self.en.changeHistoricalNames(self.getId(),self.REQUEST['historicalNames'].split("\n")) + + if self.REQUEST.has_key('active'): + self.setActiveFlag(True) + else: + self.setActiveFlag(False) if fromEdit and (RESPONSE is not None): RESPONSE.redirect('./editMPIWGBasisEditor')