--- MPIWGWeb/MPIWGProjects.py 2006/08/03 09:43:40 1.47.2.56 +++ MPIWGWeb/MPIWGProjects.py 2006/11/08 14:47:13 1.47.2.64 @@ -2,7 +2,7 @@ for organizing and maintaining the different project pages """ -#TODO: mechanismus fur links to personen ueberarbeiten, da jetzt alle e_mails als members auftauchen unabhaengig vom status publish_the_data +#TODO: mechanismus fur links to personen ueberarbeiten, da jetzt alle e_mails als members auftauchen unabhaenig vom status publish_the_data #TODO: was passiert wenn aenderungen von jochen im filemaker nicht mit den aenderungen im sql uebereinstimmen #TODO: in einzelnen projecte steht als pfad auf die bilder noch wwwneu statt www @@ -33,6 +33,7 @@ from Ft.Xml.XPath import Evaluate from Ft.Xml.XPath.Context import Context from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print from Ft.Xml import EMPTY_NAMESPACE +import copy definedFields=['WEB_title','xdata_01','xdata_02','xdata_03','xdata_04','xdata_05','xdata_06','xdata_07','xdata_08','xdata_09','xdata_10','xdata_11','xdata_12','xdata_13','WEB_project_header','WEB_project_description','WEB_related_pub'] @@ -47,6 +48,30 @@ def getTextFromNode(nodename): rc = rc + node.data return rc + +def sortStopWordsF(self,xo,yo): + if not hasattr(self,'_v_stopWords'): + self._v_stopWords=self.stopwords_en.data.split("\n") + + x=str(xo[1]) + y=str(yo[1]) + + strx=x.split(" ") + stry=y.split(" ") + + for tmp in strx: + if tmp.lower() in self._v_stopWords: + del strx[strx.index(tmp)] + + for tmp in stry: + if tmp.lower() in self._v_stopWords: + del stry[stry.index(tmp)] + + return cmp(" ".join(strx)," ".join(stry)) + +def sortStopWords(self): + return lambda x,y : sortStopWordsF(self,x,y) + def sortF(x,y): try: return cmp(x[1],y[1]) @@ -123,9 +148,9 @@ class MPIWGLink(SimpleItem): else: return self.getObj().weight - manage_options=SimpleItem.manage_options+( + manage_options=( {'label':'main config','action':'changeLinkForm'}, - ) + )+SimpleItem.manage_options def changeLinkForm(self): @@ -270,6 +295,23 @@ class MPIWGRoot(ZSQLExtendFolder): folders=['MPIWGProject','Folder','ECHO_Navigation'] meta_type='MPIWGRoot' + def harvestProjects(self): + """harvest""" + folder="/tmp" + try: + os.mkdir("/tmp/harvest_MPIWG") + except: + pass + founds=self.ZopeFind(self.aq_parent.projects,obj_metatypes=['MPIWGProject'],search_sub=1) + for found in founds: + txt=found[1].harvest_page() + + if txt and (txt != ""): + name=found[0].replace("/","_") + fh=file("/tmp/harvest_MPIWG/"+name,"w") + fh.write(txt) + fh.close() + def decode(self,str): """decoder""" if not str: @@ -283,6 +325,35 @@ class MPIWGRoot(ZSQLExtendFolder): return str + + def browserCheck(self): + """check the browsers request to find out the browser type""" + bt = {} + ua = self.REQUEST.get_header("HTTP_USER_AGENT") + bt['ua'] = ua + bt['isIE'] = False + bt['isN4'] = False + if string.find(ua, 'MSIE') > -1: + bt['isIE'] = True + else: + bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) + + try: + nav = ua[string.find(ua, '('):] + ie = string.split(nav, "; ")[1] + if string.find(ie, "MSIE") > -1: + bt['versIE'] = string.split(ie, " ")[1] + except: pass + + bt['isMac'] = string.find(ua, 'Macintosh') > -1 + bt['isWin'] = string.find(ua, 'Windows') > -1 + bt['isIEWin'] = bt['isIE'] and bt['isWin'] + bt['isIEMac'] = bt['isIE'] and bt['isMac'] + bt['staticHTML'] = False + + return bt + + def versionHeaderEN(self): """version header text""" @@ -412,6 +483,30 @@ class MPIWGRoot(ZSQLExtendFolder): return True return False + def getSection(self): + """returns the current section name""" + root = self.absolute_url() + url = self.REQUEST['URL'] + path = string.replace(url, root, '') + paths = path.split('/') + if len(paths) > 0: + sec = paths[1] + if sec.find('.') < 0: + return sec + else: + return None + return None + + def getSectionStyle(self, name, style=""): + """returns a string with the given style + '-sel' if the current section == name""" + if self.getSection() == name: + return style + '-sel' + else: + return style + + def MPIWGrootURL(self): + """returns the URL to the root""" + return self.absolute_url() def upDateSQL(self,fileName): """updates SQL databases using fm.jar""" @@ -520,6 +615,22 @@ class MPIWGRoot(ZSQLExtendFolder): self.id=id self.title=title + def removeStopWords(self,xo): + """remove stop words from xo""" + if not hasattr(self,'_v_stopWords'): + self._v_stopWords=self.stopwords_en.data.split("\n") + + x=str(xo) + + strx=x.split(" ") + + for tmp in strx: + + if tmp.lower() in self._v_stopWords: + del strx[strx.index(tmp)] + + return " ".join(strx) + def urlQuote(self,str): """quote""" return urllib.quote(str) @@ -528,87 +639,8 @@ class MPIWGRoot(ZSQLExtendFolder): """quote""" return urllib.unquote(str) - def harvestHistoricalPersons(self): - """erstelle liste aller erwaehnten actors""" - - def normalize(str): - """loesche fuhrendes space""" - if (len(str)>1) and (str[0]==" "): - ret=str[1:] - else: - ret=str - return ret - - list={} - projects=self.ZopeFind(self.projects,obj_metatypes=['MPIWGProject']) - - for project in projects: - lg=len(project[1].xdata_03[0])-1 - - if (lg>1) and (project[1].xdata_03[0][lg]==";"): - project[1].xdata_03[0]=project[1].xdata_03[0][0:lg] - - - - - try: - if len(project[1].xdata_03[0].split(";"))>1: # guess if separator is ; - for person in project[1].xdata_03[0].split(";"): - personNormal=normalize(person) - if personNormal in list.keys(): - list[urllib.quote(personNormal)][1].append(project[1]) - else: - list[urllib.quote(personNormal)]=(personNormal,[project[1]]) - else: #guess , is sepeator - for person in project[1].xdata_03[0].split(","): - personNormal=normalize(person) - if urllib.quote(personNormal) in list.keys(): - list[urllib.quote(personNormal)][1].append(project[1]) - else: - list[urllib.quote(personNormal)]=(personNormal,[project[1]]) - - except: - zLOG.LOG("MPIWG Web (harvestHistoricalPerson)",zLOG.ERROR,"cannot analyize: %s"%repr(project)) - - - return list - - def storeHistoricalPersons(self,RESPONSE=None): - """store persons""" - self.personDict={} - personDict=self.harvestHistoricalPersons() - for person in personDict.keys(): - for project in personDict[person][1]: - if person in self.personDict.keys(): - self.personDict[person][1].append((project.absolute_url(),project.WEB_title[0],project.xdata_01[0])) - else: - self.personDict[person]=(personDict[person][0],[(project.absolute_url(),project.WEB_title[0],project.xdata_01[0])]) - - if RESPONSE is not None: - RESPONSE.redirect("showHistoricalPersons") - - - def getPersonDict(self,name): - """name von dict""" - - try: - return self.personDict[name][0].encode('utf-8') - except: - return self.personDict[name][0] - return self.personDict[name][0].decode('latin-1').encode('utf-8') - def showHistoricalPersons(self): - """show persons""" - pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','showHistoricalPersons')).__of__(self) - return pt() - - - def editHistoricalPersonsForm(self): - """edit historical persons for consistency""" - pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','editHistoricalPersonsForm')).__of__(self) - return pt() - def getProjectsByFieldContent(self,fieldName,fieldContentsEntry, date=None): """gib alle Projekte aus mit Value von field mit fieldName enthält ein Element der Liste fieldContents""" def sort(x,y): @@ -718,6 +750,10 @@ class MPIWGRoot(ZSQLExtendFolder): if sort=="int": ret.sort(sortI) + elif sort=="stopWords": + + ret.sort(sortStopWords(self)) + else: ret.sort(sortF) @@ -800,7 +836,7 @@ class MPIWGRoot(ZSQLExtendFolder): RESPONSE.write("\n") RESPONSE.write("Update Institutsbibliography\n") ret=self.upDateSQL('personalwww.xml') - RESPONSE.write("done Insitutsbibliography:%s\n"%ret) + RESPONSE.write("done Insitutsbibliography:%s\n"%ret) url="http://itgroup.mpiwg-berlin.mpg.de:8050/FMPro?-db=personal-www&-format=-dso_xml&-lay=sql_export&-max=20000&-findall" dom = NonvalidatingReader.parseUri(url) #fh=urllib.urlopen(url) @@ -1015,7 +1051,7 @@ class MPIWGRoot(ZSQLExtendFolder): return entry - def getTree(self,date=None,onlyActive=None): + def getTree(self,dep=None,date=None,onlyActive=None): """generate Tree from project list""" returnList=[] @@ -1033,13 +1069,14 @@ class MPIWGRoot(ZSQLExtendFolder): if idNr[0]=="x": # kompatibilitŠt mit alter Konvention, x vor der Nummer macht project inactive project[0].setActiveFlag(False) - if onlyActive and project[0].isActiveProject(): #nur active projekte - returnList.append((depth,nr,title,project[0])) - elif not onlyActive: - returnList.append((depth,nr,title,project[0])) + if (not dep) or (idNr[0]==dep): #falls dep gesetzt ist nur dieses hinzufŸgen. + if onlyActive and project[0].isActiveProject(): #nur active projekte + returnList.append((depth,nr,title,project[0])) + elif not onlyActive: + returnList.append((depth,nr,title,project[0])) return returnList - def formatElementsAsList(self,elements): + def formatElementsAsList(self,elements,onlyOneDept=False): """formatiere tree als liste""" actualDepth=0 @@ -1085,7 +1122,8 @@ class MPIWGRoot(ZSQLExtendFolder): if actualDepth==1: departmentName={'1':'Department I','2':'Department II','3':'Department III', '4':'Ind. Research Group','5':'Ind. Research Group','6':'Ind. Research Group','7':'Research Network'} department=element[3].getContent('xdata_05') - ret+="""
%s: """%(department,departmentName[department]) + if not onlyOneDept: + ret+="""
%s: """%(department,departmentName[department]) if self.REQUEST.has_key('date'): ret+="""%s"""%(self.generateUrlProject(element[3].absolute_url())+"/index.html",self.REQUEST['date'],element[3].getContent('WEB_title')) @@ -1239,7 +1277,11 @@ class MPIWGRoot(ZSQLExtendFolder): if person =="Otto Sibum" : person="H. Otto Sibum" if person =="Norton Wise" : person="M. Norton Wise" #ret.append("%s"%(proj[0].absolute_url,person.encode('utf-8'))) - ret.append("%s"%(proj[0].absolute_url+"/index.html",person)) + if self.MPIWGrootURL().split("/")[-1]=='en': + tmpPath="/en/staff/members/" + else: + tmpPath="/de/mitarbeiter/members/" + ret.append("%s"%(tmpPath+proj[0].getObject().getId()+"/index.html",person)) else: #ret.append("%s"%person.encode('utf-8')) ret.append("%s"%person) @@ -1383,7 +1425,156 @@ class MPIWGRoot(ZSQLExtendFolder): return (fieldname in checkFields) + + def generateNameIndex(self): + """erzeuge einen index verwendeter personen""" + import psycopg + o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) + results={} + print self.fulltext.historicalNames.items() + for nameItem in self.fulltext.historicalNames.items(): #gehe durch alle namen des lexikons + + c = o.cursor() + name=nameItem[0] + print "check",name + c.execute("select lastname,firstname from persons where lower(lastname) = '%s'"%quote(name)) + tmpres=c.fetchall() + firstnames=[result[1] for result in tmpres] # find all firstnames + if tmpres: + lastname=tmpres[0][0] + + for found in self.fulltext({'names':name}): + if found.getObject().isActual(): + for nh in found.getObject().getGetNeighbourhood(name, length=50,tagging=False): #hole umgebung + #schaue nun ob der vorname hinter oder vor dem name ist + position=nh.find(lastname) + # vorher + #print "NH",nh + bevorS=nh[0:position].split() + #print "BV",bevorS + if len(bevorS)>1: + try: + bevor=[bevorS[-1],bevorS[-2]] + except: + bevor=[bevorS[0]] + else: + bevor=[] + #nachher + behindS= re.split("[,|;| ]",nh[position:]) + #print "BH",behindS + if len(behindS)>2: + try: + behind=behindS[1:3] + except: + behind=[bevorS[1]] + else: + behind=[] + for firstname in firstnames: + if firstname in bevor+behind: #Namen wie mit Adelspraedikaten werden so erstmal nich gefunden + id="%s,%s"%(lastname,firstname) + if not results.has_key(id): + results[id]=[] + objId=found.getObject().getId() + if not (objId in results[id]): + print "added %s for %s"%(id,objId) + results[id].append(objId) + self.nameIndex=results + return results + + def editNameIndexHTML(self): + """edit the name index""" + if not hasattr(self,'nameIndexEdited'): # falls editierter index noch nicht existiert, kopiere automatisch erstellten + self.nameIndexEdited=copy.copy(self.nameIndex) + print "huh" + #self.nameIndexEdited=copy.copy(self.nameIndex) + #print self.nameIndexEdited + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','editHistoricalNames.zpt')).__of__(self) + return pt() + + def getNamesInProject(self,projectId): + """get all names ofnameIndexEdited which are references in projec with projectId""" + + ret=[] + for name in self.nameIndexEdited.keys(): + if projectId in self.nameIndexEdited[name]: + ret.append(name) + + return ret + + def editNameIndex(self,RESPONSE=None,name=None,occurrances=None,submit=None): + """edit the index""" + nI=self.nameIndexEdited # mI introduced to make sure that changes to nameIndexEdited are know to ZODB + if submit=="delete": + + + dh=getattr(self,'deletedHistoricalNames',{}) + if not dh.has_key(name): + dh=occurrances.split("\n") + else: + dh+=occurrances.split("\n") + + self.deletedHistoricalNames=dh + + del self.nameIndexEdited[name] + + + elif (submit=="change"): + + nI[name]=occurrances.split("\n")[0:] + + elif (submit=="add"): + if not nI.has_key(name): + nI[name]=occurrances.split("\n") + else: + nI[name]+=occurrances.split("\n") + + self.nameIndexEdited=nI + + + if RESPONSE is not None: + RESPONSE.redirect('editNameIndexHTML') + + + + def restoreIndex(self): + """restore""" + self.nameIndexEdited=self.nameIndex + return "done" + + def changeHistoricalNames(self,projId,nameListTmp): + """add resp change nameIndexEdited b + y adding/changing names menitionen in project projId""" + + #clear namelist from blanks + nameList=[x.lstrip().rstrip() for x in nameListTmp] + + nI=self.nameIndexEdited + for name in nameList: + if not nI.has_key(name): #neuer Name + nI[name]=[projId] + + for name in nI.keys(): + + if name in nameList: #checke if name in name List + + if not(projId in nI[name]): #nicht in listt -> hinzufuegen + + nI[name].append(projId) + + """loesche falls projekt in nI list aber der name nicht mehr + in der namensliste des projectes auftaucht + """ + + for pr in nI[name]: + + if (pr==projId) and (not name in nameList): + + nI[name].remove(pr) + + self.nameIndexEdited=nI + + def manage_addMPIWGRootForm(self): """form for adding the root""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMPIWGRootForm.zpt')).__of__(self) @@ -1451,6 +1642,7 @@ class MPIWGProject(CatalogAware,Folder): meta_type='MPIWGProject' default_catalog='ProjectCatalog' + def sortedByPlace(self,metatype): """find metatype and sort by place""" def sort(x,y): @@ -1668,7 +1860,7 @@ class MPIWGProject(CatalogAware,Folder): if RESPONSE: RESPONSE.redirect('manageImages') - + def hasChildren(self,date=None,onlyActive=True): """check if project has children""" ct=self.getContexts(childs=self.getContent('xdata_05'), @@ -1907,13 +2099,14 @@ class MPIWGProject(CatalogAware,Folder): pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','no_project')).__of__(self) return pt() - def getGetNeighbourhood(self,wordStr, length=100): + def getGetNeighbourhood(self,wordStr, length=100,tagging=True): """finde umgebung um die worte in wordStr, zurŸckgegeben wird eine Array mit den Umgebungen von Fundstellen der Worte alle Tags werden entfernt, die Fundstellen werden mit XX getaggt, die Umgebungen werden case insensitive gesucht @param wordStr: string mit Worten getrennt durch Leerzeichen, Phrasen sind mit " gekennzeichnet "eine phrase", "*" bezeichnet wildcards und wird ignoriert" @param length: optional, default wert 100, 2*length ist die grš§e der Umgebung + @param tagging: optional default wert true, kein span tag wird erzweugt falls tag=false """ ret=[] # nimmt das Array auf, dass spŠter zurŸckgegeben wird @@ -1946,7 +2139,8 @@ class MPIWGProject(CatalogAware,Folder): # words=[words] txt=self.harvest_page() - + if not txt: + return ret txt=re.sub("<.*?>", "", txt) # loesche alle Tags for word in words: word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " @@ -1984,10 +2178,11 @@ class MPIWGProject(CatalogAware,Folder): break; # now highlight everything - for x in range(len(ret)): - for word in words: - repl=re.compile(word,re.IGNORECASE) - ret[x]=repl.sub(""" %s"""%word.upper(),ret[x]) + if tagging: + for x in range(len(ret)): + for word in words: + repl=re.compile(word,re.IGNORECASE) + ret[x]=repl.sub(""" %s"""%word.upper(),ret[x]) return ret @@ -2234,6 +2429,8 @@ class MPIWGProject(CatalogAware,Folder): setattr(self,x,[self.REQUEST[x].decode('utf-8')]) + self.en.changeHistoricalNames(self.getId(),self.REQUEST['historicalNames'].split("\n")) + if self.REQUEST.has_key('active'): self.setActiveFlag(True) else: @@ -2374,4 +2571,4 @@ def manage_addMPIWGProject(self,id,RESPO if RESPONSE is not None: - RESPONSE.redirect('manage_main') \ No newline at end of file + RESPONSE.redirect('manage_main')