--- MPIWGWeb/Attic/MPIWGRoot.py 2010/09/22 16:06:09 1.1.2.29 +++ MPIWGWeb/Attic/MPIWGRoot.py 2012/01/09 07:33:30 1.1.2.34 @@ -21,15 +21,15 @@ from bibliography import * import time import xml.dom.minidom import sys -from Ft.Xml.XPath import Evaluate -from Ft.Xml.XPath.Context import Context -from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print -from Ft.Xml import EMPTY_NAMESPACE +#from Ft.Xml.XPath import Evaluate +#from Ft.Xml.XPath.Context import Context +#from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print +#from Ft.Xml import EMPTY_NAMESPACE import copy import updatePersonalWWW import MPIWGStaff from MPIWGHelper import * - +from BeautifulSoup import BeautifulSoup, Comment def sortWeight(x,y): x1=int(getattr(x[1],'weight','0')) @@ -119,10 +119,19 @@ class MPIWGRoot(ZSQLExtendFolder): #if not words is ListType: # words=[words] - txt=obj.harvest_page() + txt=obj.harvest_page(mode="slim") if not txt: return ret - txt=re.sub("<.*?>", "", txt) # loesche alle Tags + + soup = BeautifulSoup(txt) + + comments = soup.findAll(text=lambda text:isinstance(text, Comment)) + [comment.extract() for comment in comments] + + txt = ''.join(soup.findAll(text=True)) + + + #txt=re.sub("<.*?>", "", txt) # loesche alle Tags for word in words: word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " pos=0 @@ -144,6 +153,13 @@ class MPIWGRoot(ZSQLExtendFolder): y=max(ranges[nr][1],y) str=txt[x:y] + if x!=0: #add dots if in the middle of text + str="..."+str + + if y!=len(txt): #add dots if in the middle of text + str=str+"..." + + if nr >=0: # word ist in einer schon gefunden Umgebung ranges[nr]=(x,y) # neue Position der Umgebung @@ -600,15 +616,18 @@ class MPIWGRoot(ZSQLExtendFolder): pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMPIWGRootForm')).__of__(self) return pt() - def changeMPIWGRoot(self,title,disciplineList,themesList,connection_id,lang=None,RESPONSE=None): + def changeMPIWGRoot(self,title,connection_id,coneServiceURL,lang=None,autocommit=None,RESPONSE=None): """change""" self.title=title self.connection_id=connection_id - self.disciplineList=disciplineList - self.themesList=themesList + #self.disciplineList=disciplineList + #self.themesList=themesList + self.coneServiceURL=coneServiceURL + if lang is not None: self.lang = lang + self.autocommit = (autocommit == "on") if RESPONSE is not None: RESPONSE.redirect('manage_main') @@ -791,7 +810,7 @@ class MPIWGRoot(ZSQLExtendFolder): if ignoreEntries is None: ignoreEntries = [] - ignoreEntries.append('current_work') # TODO:updatecurrent work + #ignoreEntries.append('current_work') # TODO:updatecurrent work logging.debug("updatePersonEntry: data=%s ignoreEntries=%s"%(repr(data),repr(ignoreEntries))) if data['date_to']=="": # wenn date_to leer @@ -883,7 +902,7 @@ class MPIWGRoot(ZSQLExtendFolder): ret+="" for iE in displayIgnored: ret+=""%(iE[0].encode('utf-8'),iE[1].encode('utf-8'),iE[2].encode('utf-8')) - ret+="" + ret+="
%s%s%s
" self.updatePersonEntry(resultSet[conflict],ignoreEntries=ignoreEntries) @@ -1501,6 +1520,19 @@ class MPIWGRoot(ZSQLExtendFolder): return "done" + def sortResults(self,results): + """search the catalog and give results back sorted by meta_type""" + ret = {} + logging.debug(results()) + for result in results(): + metaType = result.meta_type + resultList= ret.get(metaType,[]) + resultList.append(result) + ret[metaType]=resultList + + logging.debug(ret) + return ret + def manage_addMPIWGRootForm(self): """form for adding the root"""