--- MPIWGWeb/Attic/MPIWGRoot.py 2012/01/03 09:26:45 1.1.2.32 +++ MPIWGWeb/Attic/MPIWGRoot.py 2012/01/06 08:23:10 1.1.2.33 @@ -29,7 +29,7 @@ import copy import updatePersonalWWW import MPIWGStaff from MPIWGHelper import * - +from BeautifulSoup import BeautifulSoup, Comment def sortWeight(x,y): x1=int(getattr(x[1],'weight','0')) @@ -122,7 +122,16 @@ class MPIWGRoot(ZSQLExtendFolder): txt=obj.harvest_page() if not txt: return ret - txt=re.sub("<.*?>", "", txt) # loesche alle Tags + + soup = BeautifulSoup(txt) + + comments = soup.findAll(text=lambda text:isinstance(text, Comment)) + [comment.extract() for comment in comments] + + txt = ''.join(soup.findAll(text=True)) + + + #txt=re.sub("<.*?>", "", txt) # loesche alle Tags for word in words: word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " pos=0 @@ -144,6 +153,13 @@ class MPIWGRoot(ZSQLExtendFolder): y=max(ranges[nr][1],y) str=txt[x:y] + if x!=0: #add dots if in the middle of text + str="..."+str + + if y!=len(txt): #add dots if in the middle of text + str=str+"..." + + if nr >=0: # word ist in einer schon gefunden Umgebung ranges[nr]=(x,y) # neue Position der Umgebung @@ -1504,6 +1520,19 @@ class MPIWGRoot(ZSQLExtendFolder): return "done" + def sortResults(self,results): + """search the catalog and give results back sorted by meta_type""" + ret = {} + logging.debug(results()) + for result in results(): + metaType = result.meta_type + resultList= ret.get(metaType,[]) + resultList.append(result) + ret[metaType]=resultList + + logging.debug(ret) + return ret + def manage_addMPIWGRootForm(self): """form for adding the root"""