--- MPIWGWeb/Attic/MPIWGRoot.py 2010/09/22 16:06:09 1.1.2.29 +++ MPIWGWeb/Attic/MPIWGRoot.py 2012/01/09 10:36:48 1.1.2.35 @@ -21,15 +21,17 @@ from bibliography import * import time import xml.dom.minidom import sys -from Ft.Xml.XPath import Evaluate -from Ft.Xml.XPath.Context import Context -from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print -from Ft.Xml import EMPTY_NAMESPACE +#from Ft.Xml.XPath import Evaluate +#from Ft.Xml.XPath.Context import Context +#from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print +#from Ft.Xml import EMPTY_NAMESPACE import copy import updatePersonalWWW import MPIWGStaff from MPIWGHelper import * - +from BeautifulSoup import BeautifulSoup, Comment +from ZODB import FileStorage, DB +from ZEO import ClientStorage def sortWeight(x,y): x1=int(getattr(x[1],'weight','0')) @@ -40,6 +42,7 @@ def sortWeight(x,y): class MPIWGRoot(ZSQLExtendFolder): """Stammordner fuer den Web-Server""" + _v_harvestCache=None meta_type='MPIWGRoot' fieldLabels={'WEB_title':'WEB_Title', @@ -78,6 +81,35 @@ class MPIWGRoot(ZSQLExtendFolder): ) + def getHarvestCachePort(self): + return getattr(self,"harvestPort",29999) + + def getHarvestCacheServer(self): + return getattr(self,"harvestServer","localhost") + + + def getHarvestCache(self): + logging.debug("CACHE:"+repr(self._v_harvestCache)) + if self._v_harvestCache==None: + #storage = FileStorage.FileStorage('/tmp/'+self.getId()+'test-filestorage.fs') + addr = self.getHarvestCacheServer(), self.getHarvestCachePort() + storage = ClientStorage.ClientStorage(addr) + db = DB(storage) + self._v_harvestDV=db + conn = db.open() + dbroot = conn.root() + if not dbroot.has_key('templates'): + from BTrees.OOBTree import OOBTree + dbroot['templates'] = OOBTree() + + self._v_harvestCache = dbroot['templates'] + logging.debug("CACHE2:"+repr(self._v_harvestCache)) + return self._v_harvestCache + + def __del__(self): + if self._v_harvestCache!=None: + self._v_harvestDV.close(); + def getGetNeighbourhood(self,obj, wordStr, length=100,tagging=True): """finde umgebung um die worte in wordStr, zurueckgegeben wird eine Array mit den Umgebungen von Fundstellen der Worte alle Tags werden entfernt, die Fundstellen werden mit XX getaggt, die Umgebungen werden @@ -118,11 +150,29 @@ class MPIWGRoot(ZSQLExtendFolder): words=wordStr.split(" ") #if not words is ListType: # words=[words] + + + txtCache = self.en.getHarvestCache(); + txt= txtCache.get(obj.absolute_url(),None) + + if txt==None: - txt=obj.harvest_page() + logging.debug("NO CACHE for: "+obj.absolute_url()) + txt=obj.harvest_page(mode="slim") + + if not txt: return ret - txt=re.sub("<.*?>", "", txt) # loesche alle Tags + + soup = BeautifulSoup(txt) + + comments = soup.findAll(text=lambda text:isinstance(text, Comment)) + [comment.extract() for comment in comments] + + txt = ''.join(soup.findAll(text=True)) + + + #txt=re.sub("<.*?>", "", txt) # loesche alle Tags for word in words: word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " pos=0 @@ -144,6 +194,13 @@ class MPIWGRoot(ZSQLExtendFolder): y=max(ranges[nr][1],y) str=txt[x:y] + if x!=0: #add dots if in the middle of text + str="..."+str + + if y!=len(txt): #add dots if in the middle of text + str=str+"..." + + if nr >=0: # word ist in einer schon gefunden Umgebung ranges[nr]=(x,y) # neue Position der Umgebung @@ -600,15 +657,23 @@ class MPIWGRoot(ZSQLExtendFolder): pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMPIWGRootForm')).__of__(self) return pt() - def changeMPIWGRoot(self,title,disciplineList,themesList,connection_id,lang=None,RESPONSE=None): + def changeMPIWGRoot(self,title,connection_id,coneServiceURL,harvestPort,harvestServer,lang=None,autocommit=None,RESPONSE=None): """change""" self.title=title self.connection_id=connection_id - self.disciplineList=disciplineList - self.themesList=themesList + #self.disciplineList=disciplineList + #self.themesList=themesList + self.coneServiceURL=coneServiceURL + self.harvestServer=harvestServer + try: + self.harvestPort=int(harvestPort) + except: + logging.error("couldn't change port!: no number:"+harvestPort) + if lang is not None: self.lang = lang + self.autocommit = (autocommit == "on") if RESPONSE is not None: RESPONSE.redirect('manage_main') @@ -791,7 +856,7 @@ class MPIWGRoot(ZSQLExtendFolder): if ignoreEntries is None: ignoreEntries = [] - ignoreEntries.append('current_work') # TODO:updatecurrent work + #ignoreEntries.append('current_work') # TODO:updatecurrent work logging.debug("updatePersonEntry: data=%s ignoreEntries=%s"%(repr(data),repr(ignoreEntries))) if data['date_to']=="": # wenn date_to leer @@ -883,7 +948,7 @@ class MPIWGRoot(ZSQLExtendFolder): ret+="" for iE in displayIgnored: ret+=""%(iE[0].encode('utf-8'),iE[1].encode('utf-8'),iE[2].encode('utf-8')) - ret+="" + ret+="
%s%s%s
" self.updatePersonEntry(resultSet[conflict],ignoreEntries=ignoreEntries) @@ -955,9 +1020,10 @@ class MPIWGRoot(ZSQLExtendFolder): logger("MPIWG Root (reindexCatalog: fulltextProjectsMembers)",logging.WARNING," %s %s"%sys.exc_info()[:2]) - - + + + if RESPONSE: RESPONSE.redirect('manage_main') @@ -1256,11 +1322,12 @@ class MPIWGRoot(ZSQLExtendFolder): ret=[] if key: + logging.debug("MPIWGROOT (getProjectsOfMember):"+key) proj=self.ProjectCatalog({'getPersonKeyList':utf8ify(key)}) else: return ret # key muss definiert sein - + logging.debug("MPIWGROOT (getProjectsOfMember):"+repr(proj)) if proj: proj2=[] for x in proj: @@ -1501,6 +1568,19 @@ class MPIWGRoot(ZSQLExtendFolder): return "done" + def sortResults(self,results): + """search the catalog and give results back sorted by meta_type""" + ret = {} + logging.debug(results()) + for result in results(): + metaType = result.meta_type + resultList= ret.get(metaType,[]) + resultList.append(result) + ret[metaType]=resultList + + logging.debug(ret) + return ret + def manage_addMPIWGRootForm(self): """form for adding the root"""