--- MPIWGWeb/Attic/MPIWGRoot.py 2010/09/22 16:06:09 1.1.2.29 +++ MPIWGWeb/Attic/MPIWGRoot.py 2012/03/13 07:04:24 1.1.2.41 @@ -21,15 +21,19 @@ from bibliography import * import time import xml.dom.minidom import sys -from Ft.Xml.XPath import Evaluate -from Ft.Xml.XPath.Context import Context -from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print -from Ft.Xml import EMPTY_NAMESPACE +import transaction + +#from Ft.Xml.XPath import Evaluate +#from Ft.Xml.XPath.Context import Context +#from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print +#from Ft.Xml import EMPTY_NAMESPACE import copy import updatePersonalWWW import MPIWGStaff from MPIWGHelper import * - +from BeautifulSoup import BeautifulSoup, Comment +from ZODB import FileStorage, DB +from ZEO import ClientStorage def sortWeight(x,y): x1=int(getattr(x[1],'weight','0')) @@ -40,6 +44,7 @@ def sortWeight(x,y): class MPIWGRoot(ZSQLExtendFolder): """Stammordner fuer den Web-Server""" + _v_harvestCache=None meta_type='MPIWGRoot' fieldLabels={'WEB_title':'WEB_Title', @@ -78,6 +83,38 @@ class MPIWGRoot(ZSQLExtendFolder): ) + def getHarvestCachePort(self): + return getattr(self,"harvestPort",29999) + + def getHarvestCacheServer(self): + return getattr(self,"harvestServer","localhost") + + + def getHarvestCache(self): + logging.debug("CACHE:"+repr(self._v_harvestCache)) + if self._v_harvestCache==None: + #storage = FileStorage.FileStorage('/tmp/'+self.getId()+'test-filestorage.fs') + addr = self.getHarvestCacheServer(), self.getHarvestCachePort() + storage = ClientStorage.ClientStorage(addr) + db = DB(storage) + self._v_harvestDV=db + self._v_harvestDV=db + conn = db.open() + dbroot = conn.root() + if not dbroot.has_key('templates'): + from BTrees.OOBTree import OOBTree + dbroot['templates'] = OOBTree() + + self._v_harvestCache = dbroot['templates'] + logging.debug("CACHE2:"+repr(self._v_harvestCache)) + return self._v_harvestCache + + + + def __del__(self): + if self._v_harvestCache!=None: + self._v_harvestDV.close(); + def getGetNeighbourhood(self,obj, wordStr, length=100,tagging=True): """finde umgebung um die worte in wordStr, zurueckgegeben wird eine Array mit den Umgebungen von Fundstellen der Worte alle Tags werden entfernt, die Fundstellen werden mit XX getaggt, die Umgebungen werden @@ -118,11 +155,29 @@ class MPIWGRoot(ZSQLExtendFolder): words=wordStr.split(" ") #if not words is ListType: # words=[words] + + + txtCache = self.en.getHarvestCache(); + txt= txtCache.get(obj.absolute_url(),None) + + if txt==None: - txt=obj.harvest_page() + logging.debug("NO CACHE for: "+obj.absolute_url()) + txt=obj.harvest_page(mode="slim") + + if not txt: return ret - txt=re.sub("<.*?>", "", txt) # loesche alle Tags + + soup = BeautifulSoup(txt) + + comments = soup.findAll(text=lambda text:isinstance(text, Comment)) + [comment.extract() for comment in comments] + + txt = ''.join(soup.findAll(text=True)) + + + #txt=re.sub("<.*?>", "", txt) # loesche alle Tags for word in words: word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " pos=0 @@ -144,6 +199,13 @@ class MPIWGRoot(ZSQLExtendFolder): y=max(ranges[nr][1],y) str=txt[x:y] + if x!=0: #add dots if in the middle of text + str="..."+str + + if y!=len(txt): #add dots if in the middle of text + str=str+"..." + + if nr >=0: # word ist in einer schon gefunden Umgebung ranges[nr]=(x,y) # neue Position der Umgebung @@ -600,15 +662,23 @@ class MPIWGRoot(ZSQLExtendFolder): pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMPIWGRootForm')).__of__(self) return pt() - def changeMPIWGRoot(self,title,disciplineList,themesList,connection_id,lang=None,RESPONSE=None): + def changeMPIWGRoot(self,title,connection_id,coneServiceURL,harvestPort,harvestServer,lang=None,autocommit=None,RESPONSE=None): """change""" self.title=title self.connection_id=connection_id - self.disciplineList=disciplineList - self.themesList=themesList + #self.disciplineList=disciplineList + #self.themesList=themesList + self.coneServiceURL=coneServiceURL + self.harvestServer=harvestServer + try: + self.harvestPort=int(harvestPort) + except: + logging.error("couldn't change port!: no number:"+harvestPort) + if lang is not None: self.lang = lang + self.autocommit = (autocommit == "on") if RESPONSE is not None: RESPONSE.redirect('manage_main') @@ -649,7 +719,49 @@ class MPIWGRoot(ZSQLExtendFolder): return ret + + def getAllProjectsAndTagsAsCSV(self,archived=1,RESPONSE=None): + """alle projekte auch die nicht getaggten""" + retList=[] + headers=['projectId','sortingNumber','projectName','scholars','startedAt','completedAt','lastChangeThesaurusAt','lastChangeProjectAt','projectCreatedAt','persons','places','objects'] + headers.extend(list(self.thesaurus.tags.keys())) + retList.append("\t".join(headers)) + if not hasattr(self,'thesaurus'): + return "NON thesaurus (there have to be a MPIWGthesaurus object, with object ID thesaurus)" + + projectTags = self.thesaurus.getProjectsAndTags() + for project in self.getProjectFields('WEB_title_or_short'): + proj = project[0] + p_name = project[1] + retProj=[] + #if (not proj.isArchivedProject() and archived==1) or (proj.isArchivedProject() and archived==2): + retProj.append(self.utf8ify(proj.getId())) + retProj.append(self.utf8ify(proj.getContent('xdata_05'))) + retProj.append(self.utf8ify(p_name)) + retProj.append(self.utf8ify(proj.getContent('xdata_01'))) + retProj.append(self.utf8ify(proj.getStartedAt())) + retProj.append(self.utf8ify(proj.getCompletedAt())) + changeDate=self.thesaurus.lastChangeInThesaurus.get(proj.getId(),'') + n = re.sub("[:\- ]","",str(changeDate)) + retProj.append(n) + retProj.append(self.utf8ify(getattr(proj,'creationTime','20050101000000'))) + retProj.append("")#TODO: project created at + retProj.append(";".join([person[1] for person in self.thesaurus.getPersonsFromProject(proj.getId())])) + retProj.append(";".join([person[1] for person in self.thesaurus.getHistoricalPlacesFromProject(proj.getId())])) + retProj.append(";".join([person[1] for person in self.thesaurus.getObjectsFromProject(proj.getId())])) + retProj+=self.thesaurus.getTags(proj.getId(),projectTags) + retList.append("\t".join(retProj)) + + if RESPONSE: + + RESPONSE.setHeader('Content-Disposition','attachment; filename="ProjectsAndTags.tsv"') + RESPONSE.setHeader('Content-Type', "application/octet-stream") + + return "\n".join(retList); + + + def getProjectFields(self,fieldName,date=None,folder=None,sort=None): """getListofFieldNames""" ret=[] @@ -791,7 +903,7 @@ class MPIWGRoot(ZSQLExtendFolder): if ignoreEntries is None: ignoreEntries = [] - ignoreEntries.append('current_work') # TODO:updatecurrent work + #ignoreEntries.append('current_work') # TODO:updatecurrent work logging.debug("updatePersonEntry: data=%s ignoreEntries=%s"%(repr(data),repr(ignoreEntries))) if data['date_to']=="": # wenn date_to leer @@ -883,7 +995,7 @@ class MPIWGRoot(ZSQLExtendFolder): ret+="" for iE in displayIgnored: ret+=""%(iE[0].encode('utf-8'),iE[1].encode('utf-8'),iE[2].encode('utf-8')) - ret+="" + ret+="
%s%s%s
" self.updatePersonEntry(resultSet[conflict],ignoreEntries=ignoreEntries) @@ -955,9 +1067,10 @@ class MPIWGRoot(ZSQLExtendFolder): logger("MPIWG Root (reindexCatalog: fulltextProjectsMembers)",logging.WARNING," %s %s"%sys.exc_info()[:2]) - - + + + if RESPONSE: RESPONSE.redirect('manage_main') @@ -1229,10 +1342,10 @@ class MPIWGRoot(ZSQLExtendFolder): """give tuple member /projects""" ret=[] members=self.getAllMembers() - logging.error("X %s"%repr(members)) + logging.debug("X %s"%repr(members)) #return str(members) for x in members: - logging.error("X %s"%repr(x)) + #logging.debug("X %s"%repr(x)) projects=self.getProjectsOfMember(key=x[1],date=date) if len(projects)>0: ret.append((x[0],projects)) @@ -1256,11 +1369,12 @@ class MPIWGRoot(ZSQLExtendFolder): ret=[] if key: + logging.debug("MPIWGROOT (getProjectsOfMember):"+key) proj=self.ProjectCatalog({'getPersonKeyList':utf8ify(key)}) else: return ret # key muss definiert sein - + #logging.debug("MPIWGROOT (getProjectsOfMember):"+repr(proj)) if proj: proj2=[] for x in proj: @@ -1501,6 +1615,19 @@ class MPIWGRoot(ZSQLExtendFolder): return "done" + def sortResults(self,results): + """search the catalog and give results back sorted by meta_type""" + ret = {} + logging.debug(results()) + for result in results(): + metaType = result.meta_type + resultList= ret.get(metaType,[]) + resultList.append(result) + ret[metaType]=resultList + + logging.debug(ret) + return ret + def manage_addMPIWGRootForm(self): """form for adding the root"""