--- MPIWGWeb/MPIWGProjects.py 2006/07/27 09:01:00 1.47.2.55 +++ MPIWGWeb/MPIWGProjects.py 2006/09/14 18:28:27 1.47.2.60 @@ -33,6 +33,7 @@ from Ft.Xml.XPath import Evaluate from Ft.Xml.XPath.Context import Context from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print from Ft.Xml import EMPTY_NAMESPACE +import psycopg definedFields=['WEB_title','xdata_01','xdata_02','xdata_03','xdata_04','xdata_05','xdata_06','xdata_07','xdata_08','xdata_09','xdata_10','xdata_11','xdata_12','xdata_13','WEB_project_header','WEB_project_description','WEB_related_pub'] @@ -47,6 +48,30 @@ def getTextFromNode(nodename): rc = rc + node.data return rc + +def sortStopWordsF(self,xo,yo): + if not hasattr(self,'_v_stopWords'): + self._v_stopWords=self.stopwords_en.data.split("\n") + + x=str(xo[1]) + y=str(yo[1]) + + strx=x.split(" ") + stry=y.split(" ") + + for tmp in strx: + if tmp.lower() in self._v_stopWords: + del strx[strx.index(tmp)] + + for tmp in stry: + if tmp.lower() in self._v_stopWords: + del stry[stry.index(tmp)] + + return cmp(" ".join(strx)," ".join(stry)) + +def sortStopWords(self): + return lambda x,y : sortStopWordsF(self,x,y) + def sortF(x,y): try: return cmp(x[1],y[1]) @@ -270,6 +295,23 @@ class MPIWGRoot(ZSQLExtendFolder): folders=['MPIWGProject','Folder','ECHO_Navigation'] meta_type='MPIWGRoot' + def harvestProjects(self): + """harvest""" + folder="/tmp" + try: + os.mkdir("/tmp/harvest_MPIWG") + except: + pass + founds=self.ZopeFind(self.aq_parent.projects,obj_metatypes=['MPIWGProject'],search_sub=1) + for found in founds: + txt=found[1].harvest_page() + + if txt and (txt != ""): + name=found[0].replace("/","_") + fh=file("/tmp/harvest_MPIWG/"+name,"w") + fh.write(txt) + fh.close() + def decode(self,str): """decoder""" if not str: @@ -353,7 +395,23 @@ class MPIWGRoot(ZSQLExtendFolder): return True else: return False - + + def subNavStatic(self,obj): + """subnav" von self""" + def sortWeight(x,y): + x1=int(getattr(x[1],'weight','0')) + y1=int(getattr(y[1],'weight','0')) + return cmp(x1,y1) + + subs=self.ZopeFind(obj,obj_metatypes=['MPIWGTemplate','MPIWGLink']) + subret=[] + + for x in subs: + if not(x[1].title==""): + subret.append(x) + subret.sort(sortWeight) + return subret + def subNav(self,obj): """return subnav elemente""" def sortWeight(x,y): @@ -368,12 +426,14 @@ class MPIWGRoot(ZSQLExtendFolder): #suche die zweite ebene + if not obj.aq_parent.getId() in ['de','en']: obj=obj.aq_parent - + while not self.ZopeFind(self,obj_ids=[obj.getId()]): obj=obj.aq_parent + if hasattr(self,obj.getId()): subs=self.ZopeFind(getattr(self,obj.getId()),obj_metatypes=['MPIWGTemplate','MPIWGLink']) @@ -502,6 +562,22 @@ class MPIWGRoot(ZSQLExtendFolder): self.id=id self.title=title + def removeStopWords(self,xo): + """remove stop words from xo""" + if not hasattr(self,'_v_stopWords'): + self._v_stopWords=self.stopwords_en.data.split("\n") + + x=str(xo) + + strx=x.split(" ") + + for tmp in strx: + + if tmp.lower() in self._v_stopWords: + del strx[strx.index(tmp)] + + return " ".join(strx) + def urlQuote(self,str): """quote""" return urllib.quote(str) @@ -700,6 +776,10 @@ class MPIWGRoot(ZSQLExtendFolder): if sort=="int": ret.sort(sortI) + elif sort=="stopWords": + + ret.sort(sortStopWords(self)) + else: ret.sort(sortF) @@ -997,7 +1077,7 @@ class MPIWGRoot(ZSQLExtendFolder): return entry - def getTree(self,date=None,onlyActive=None): + def getTree(self,dep=None,date=None,onlyActive=None): """generate Tree from project list""" returnList=[] @@ -1015,13 +1095,14 @@ class MPIWGRoot(ZSQLExtendFolder): if idNr[0]=="x": # kompatibilitŠt mit alter Konvention, x vor der Nummer macht project inactive project[0].setActiveFlag(False) - if onlyActive and project[0].isActiveProject(): #nur active projekte - returnList.append((depth,nr,title,project[0])) - elif not onlyActive: - returnList.append((depth,nr,title,project[0])) + if (not dep) or (idNr[0]==dep): #falls dep gesetzt ist nur dieses hinzufŸgen. + if onlyActive and project[0].isActiveProject(): #nur active projekte + returnList.append((depth,nr,title,project[0])) + elif not onlyActive: + returnList.append((depth,nr,title,project[0])) return returnList - def formatElementsAsList(self,elements): + def formatElementsAsList(self,elements,onlyOneDept=False): """formatiere tree als liste""" actualDepth=0 @@ -1067,7 +1148,8 @@ class MPIWGRoot(ZSQLExtendFolder): if actualDepth==1: departmentName={'1':'Department I','2':'Department II','3':'Department III', '4':'Ind. Research Group','5':'Ind. Research Group','6':'Ind. Research Group','7':'Research Network'} department=element[3].getContent('xdata_05') - ret+="""
%s: """%(department,departmentName[department]) + if not onlyOneDept: + ret+="""
%s: """%(department,departmentName[department]) if self.REQUEST.has_key('date'): ret+="""%s"""%(self.generateUrlProject(element[3].absolute_url())+"/index.html",self.REQUEST['date'],element[3].getContent('WEB_title')) @@ -1365,7 +1447,61 @@ class MPIWGRoot(ZSQLExtendFolder): return (fieldname in checkFields) - + + def generateNameIndex(self): + """erzeuge einen index verwendeter personen""" + o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) + results={} + print self.fulltext.historicalNames.items() + for nameItem in self.fulltext.historicalNames.items(): #gehe durch alle namen des lexikons + + c = o.cursor() + name=nameItem[0] + print "check",name + c.execute("select lastname,firstname from persons where lower(lastname) = '%s'"%quote(name)) + tmpres=c.fetchall() + firstnames=[result[1] for result in tmpres] # find all firstnames + if tmpres: + lastname=tmpres[0][0] + + for found in self.fulltext({'names':name}): + if found.getObject().isActual(): + for nh in found.getObject().getGetNeighbourhood(name, length=50,tagging=False): #hole umgebung + #schaue nun ob der vorname hinter oder vor dem name ist + position=nh.find(lastname) + # vorher + #print "NH",nh + bevorS=nh[0:position].split() + #print "BV",bevorS + if len(bevorS)>1: + try: + bevor=[bevorS[-1],bevorS[-2]] + except: + bevor=[bevorS[0]] + else: + bevor=[] + #nachher + behindS= re.split("[,|;| ]",nh[position:]) + #print "BH",behindS + if len(behindS)>2: + try: + behind=behindS[1:3] + except: + behind=[bevorS[1]] + else: + behind=[] + for firstname in firstnames: + if firstname in bevor+behind: #Namen wie mit Adelspraedikaten werden so erstmal nich gefunden + id="%s,%s"%(lastname,firstname) + if not results.has_key(id): + results[id]=[] + objId=found.getObject().getId() + if not (objId in results[id]): + print "added %s for %s"%(id,objId) + results[id].append(objId) + self.nameIndex=results + return results + def manage_addMPIWGRootForm(self): """form for adding the root""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addMPIWGRootForm.zpt')).__of__(self) @@ -1433,6 +1569,7 @@ class MPIWGProject(CatalogAware,Folder): meta_type='MPIWGProject' default_catalog='ProjectCatalog' + def sortedByPlace(self,metatype): """find metatype and sort by place""" def sort(x,y): @@ -1650,7 +1787,7 @@ class MPIWGProject(CatalogAware,Folder): if RESPONSE: RESPONSE.redirect('manageImages') - + def hasChildren(self,date=None,onlyActive=True): """check if project has children""" ct=self.getContexts(childs=self.getContent('xdata_05'), @@ -1889,13 +2026,14 @@ class MPIWGProject(CatalogAware,Folder): pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','no_project')).__of__(self) return pt() - def getGetNeighbourhood(self,wordStr, length=100): + def getGetNeighbourhood(self,wordStr, length=100,tagging=True): """finde umgebung um die worte in wordStr, zurŸckgegeben wird eine Array mit den Umgebungen von Fundstellen der Worte alle Tags werden entfernt, die Fundstellen werden mit XX getaggt, die Umgebungen werden case insensitive gesucht @param wordStr: string mit Worten getrennt durch Leerzeichen, Phrasen sind mit " gekennzeichnet "eine phrase", "*" bezeichnet wildcards und wird ignoriert" @param length: optional, default wert 100, 2*length ist die grš§e der Umgebung + @param tagging: optional default wert true, kein span tag wird erzweugt falls tag=false """ ret=[] # nimmt das Array auf, dass spŠter zurŸckgegeben wird @@ -1928,7 +2066,8 @@ class MPIWGProject(CatalogAware,Folder): # words=[words] txt=self.harvest_page() - + if not txt: + return ret txt=re.sub("<.*?>", "", txt) # loesche alle Tags for word in words: word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " @@ -1966,10 +2105,11 @@ class MPIWGProject(CatalogAware,Folder): break; # now highlight everything - for x in range(len(ret)): - for word in words: - repl=re.compile(word,re.IGNORECASE) - ret[x]=repl.sub(""" %s"""%word.upper(),ret[x]) + if tagging: + for x in range(len(ret)): + for word in words: + repl=re.compile(word,re.IGNORECASE) + ret[x]=repl.sub(""" %s"""%word.upper(),ret[x]) return ret