--- MPIWGWeb/Attic/MPIWGRoot.py 2010/07/15 10:20:17 1.1.2.28 +++ MPIWGWeb/Attic/MPIWGRoot.py 2012/12/07 10:12:56 1.1.2.42 @@ -21,15 +21,19 @@ from bibliography import * import time import xml.dom.minidom import sys -from Ft.Xml.XPath import Evaluate -from Ft.Xml.XPath.Context import Context -from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print -from Ft.Xml import EMPTY_NAMESPACE +import transaction + +#from Ft.Xml.XPath import Evaluate +#from Ft.Xml.XPath.Context import Context +#from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print +#from Ft.Xml import EMPTY_NAMESPACE import copy import updatePersonalWWW import MPIWGStaff from MPIWGHelper import * - +from BeautifulSoup import BeautifulSoup, Comment +from ZODB import FileStorage, DB +from ZEO import ClientStorage def sortWeight(x,y): x1=int(getattr(x[1],'weight','0')) @@ -40,6 +44,7 @@ def sortWeight(x,y): class MPIWGRoot(ZSQLExtendFolder): """Stammordner fuer den Web-Server""" + _v_harvestCache=None meta_type='MPIWGRoot' fieldLabels={'WEB_title':'WEB_Title', @@ -67,6 +72,49 @@ class MPIWGRoot(ZSQLExtendFolder): # types of objects that show up in navigation nav_meta_types = ['MPIWGTemplate','MPIWGLink','MPIWGFolder'] + manage_options = Folder.manage_options+( + {'label':'Update personal homepages','action':'updatePersonalwww_html'}, + {'label':'Reindex catalogs','action':'reindexCatalogs'}, + {'label':'Main config','action':'changeMPIWGRootForm'}, + {'label':'add e-mails','action':'showNewDBEntries'}, + #{'label':'update the institutsbibliography','action':'updateInstitutsbiliography'}, + #{'label':'Edit Historical Persons','action':'editHistoricalPersonsForm'}, + #{'label':'Store Historical Persons','action':'storeHistoricalPersons'}, + ) + + + def getHarvestCachePort(self): + return getattr(self,"harvestPort",29999) + + def getHarvestCacheServer(self): + return getattr(self,"harvestServer","localhost") + + + def getHarvestCache(self): + logging.debug("CACHE:"+repr(self._v_harvestCache)) + if self._v_harvestCache==None: + #storage = FileStorage.FileStorage('/tmp/'+self.getId()+'test-filestorage.fs') + addr = self.getHarvestCacheServer(), self.getHarvestCachePort() + storage = ClientStorage.ClientStorage(addr) + db = DB(storage) + self._v_harvestDV=db + self._v_harvestDV=db + conn = db.open() + dbroot = conn.root() + if not dbroot.has_key('templates'): + from BTrees.OOBTree import OOBTree + dbroot['templates'] = OOBTree() + + self._v_harvestCache = dbroot['templates'] + logging.debug("CACHE2:"+repr(self._v_harvestCache)) + return self._v_harvestCache + + + + def __del__(self): + if self._v_harvestCache!=None: + self._v_harvestDV.close(); + def getGetNeighbourhood(self,obj, wordStr, length=100,tagging=True): """finde umgebung um die worte in wordStr, zurueckgegeben wird eine Array mit den Umgebungen von Fundstellen der Worte alle Tags werden entfernt, die Fundstellen werden mit XX getaggt, die Umgebungen werden @@ -107,11 +155,29 @@ class MPIWGRoot(ZSQLExtendFolder): words=wordStr.split(" ") #if not words is ListType: # words=[words] + + + txtCache = self.en.getHarvestCache(); + txt= txtCache.get(obj.absolute_url(),None) + + if txt==None: - txt=obj.harvest_page() + logging.debug("NO CACHE for: "+obj.absolute_url()) + txt=obj.harvest_page(mode="slim") + + if not txt: return ret - txt=re.sub("<.*?>", "", txt) # loesche alle Tags + + soup = BeautifulSoup(txt) + + comments = soup.findAll(text=lambda text:isinstance(text, Comment)) + [comment.extract() for comment in comments] + + txt = ''.join(soup.findAll(text=True)) + + + #txt=re.sub("<.*?>", "", txt) # loesche alle Tags for word in words: word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " pos=0 @@ -133,6 +199,13 @@ class MPIWGRoot(ZSQLExtendFolder): y=max(ranges[nr][1],y) str=txt[x:y] + if x!=0: #add dots if in the middle of text + str="..."+str + + if y!=len(txt): #add dots if in the middle of text + str=str+"..." + + if nr >=0: # word ist in einer schon gefunden Umgebung ranges[nr]=(x,y) # neue Position der Umgebung @@ -323,6 +396,9 @@ class MPIWGRoot(ZSQLExtendFolder): return base def isNewCapital(self,text=None,reset=None): + + if text: + text=text.upper() if reset: self.REQUEST['capital']="A" return True @@ -589,15 +665,23 @@ class MPIWGRoot(ZSQLExtendFolder): pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeMPIWGRootForm')).__of__(self) return pt() - def changeMPIWGRoot(self,title,disciplineList,themesList,connection_id,lang=None,RESPONSE=None): + def changeMPIWGRoot(self,title,connection_id,coneServiceURL,harvestPort,harvestServer,lang=None,autocommit=None,RESPONSE=None): """change""" self.title=title self.connection_id=connection_id - self.disciplineList=disciplineList - self.themesList=themesList + #self.disciplineList=disciplineList + #self.themesList=themesList + self.coneServiceURL=coneServiceURL + self.harvestServer=harvestServer + try: + self.harvestPort=int(harvestPort) + except: + logging.error("couldn't change port!: no number:"+harvestPort) + if lang is not None: self.lang = lang + self.autocommit = (autocommit == "on") if RESPONSE is not None: RESPONSE.redirect('manage_main') @@ -638,7 +722,49 @@ class MPIWGRoot(ZSQLExtendFolder): return ret + + def getAllProjectsAndTagsAsCSV(self,archived=1,RESPONSE=None): + """alle projekte auch die nicht getaggten""" + retList=[] + headers=['projectId','sortingNumber','projectName','scholars','startedAt','completedAt','lastChangeThesaurusAt','lastChangeProjectAt','projectCreatedAt','persons','places','objects'] + headers.extend(list(self.thesaurus.tags.keys())) + retList.append("\t".join(headers)) + if not hasattr(self,'thesaurus'): + return "NON thesaurus (there have to be a MPIWGthesaurus object, with object ID thesaurus)" + + projectTags = self.thesaurus.getProjectsAndTags() + for project in self.getProjectFields('WEB_title_or_short'): + proj = project[0] + p_name = project[1] + retProj=[] + #if (not proj.isArchivedProject() and archived==1) or (proj.isArchivedProject() and archived==2): + retProj.append(self.utf8ify(proj.getId())) + retProj.append(self.utf8ify(proj.getContent('xdata_05'))) + retProj.append(self.utf8ify(p_name)) + retProj.append(self.utf8ify(proj.getContent('xdata_01'))) + retProj.append(self.utf8ify(proj.getStartedAt())) + retProj.append(self.utf8ify(proj.getCompletedAt())) + changeDate=self.thesaurus.lastChangeInThesaurus.get(proj.getId(),'') + n = re.sub("[:\- ]","",str(changeDate)) + retProj.append(n) + retProj.append(self.utf8ify(getattr(proj,'creationTime','20050101000000'))) + retProj.append("")#TODO: project created at + retProj.append(";".join([person[1] for person in self.thesaurus.getPersonsFromProject(proj.getId())])) + retProj.append(";".join([person[1] for person in self.thesaurus.getHistoricalPlacesFromProject(proj.getId())])) + retProj.append(";".join([person[1] for person in self.thesaurus.getObjectsFromProject(proj.getId())])) + retProj+=self.thesaurus.getTags(proj.getId(),projectTags) + retList.append("\t".join(retProj)) + + if RESPONSE: + + RESPONSE.setHeader('Content-Disposition','attachment; filename="ProjectsAndTags.tsv"') + RESPONSE.setHeader('Content-Type', "application/octet-stream") + + return "\n".join(retList); + + + def getProjectFields(self,fieldName,date=None,folder=None,sort=None): """getListofFieldNames""" ret=[] @@ -683,17 +809,6 @@ class MPIWGRoot(ZSQLExtendFolder): return projects - manage_options = Folder.manage_options+( - {'label':'Update personal homepages','action':'updatePersonalwww_html'}, - {'label':'Reindex catalogs','action':'reindexCatalogs'}, - {'label':'Main config','action':'changeMPIWGRootForm'}, - {'label':'add e-mails','action':'showNewDBEntries'}, - {'label':'update the institutsbibliography','action':'updateInstitutsbiliography'}, - #{'label':'Edit Historical Persons','action':'editHistoricalPersonsForm'}, - #{'label':'Store Historical Persons','action':'storeHistoricalPersons'}, - ) - - def updatePublicationDB(self,personId=None): """updates the publication db, i.e. copy year and type into the main table""" @@ -791,7 +906,7 @@ class MPIWGRoot(ZSQLExtendFolder): if ignoreEntries is None: ignoreEntries = [] - ignoreEntries.append('current_work') # TODO:updatecurrent work + #ignoreEntries.append('current_work') # TODO:updatecurrent work logging.debug("updatePersonEntry: data=%s ignoreEntries=%s"%(repr(data),repr(ignoreEntries))) if data['date_to']=="": # wenn date_to leer @@ -883,7 +998,7 @@ class MPIWGRoot(ZSQLExtendFolder): ret+="" for iE in displayIgnored: ret+=""%(iE[0].encode('utf-8'),iE[1].encode('utf-8'),iE[2].encode('utf-8')) - ret+="" + ret+="
%s%s%s
" self.updatePersonEntry(resultSet[conflict],ignoreEntries=ignoreEntries) @@ -955,9 +1070,10 @@ class MPIWGRoot(ZSQLExtendFolder): logger("MPIWG Root (reindexCatalog: fulltextProjectsMembers)",logging.WARNING," %s %s"%sys.exc_info()[:2]) - - + + + if RESPONSE: RESPONSE.redirect('manage_main') @@ -968,7 +1084,7 @@ class MPIWGRoot(ZSQLExtendFolder): #ret=[] def sorter(x,y): - return cmp(x[0],y[0]) + return cmp(x[0].lower(),y[0].lower()) results=self.MembersCatalog({'isPublished':True}) @@ -1229,10 +1345,10 @@ class MPIWGRoot(ZSQLExtendFolder): """give tuple member /projects""" ret=[] members=self.getAllMembers() - logging.error("X %s"%repr(members)) + logging.debug("X %s"%repr(members)) #return str(members) for x in members: - logging.error("X %s"%repr(x)) + #logging.debug("X %s"%repr(x)) projects=self.getProjectsOfMember(key=x[1],date=date) if len(projects)>0: ret.append((x[0],projects)) @@ -1256,11 +1372,12 @@ class MPIWGRoot(ZSQLExtendFolder): ret=[] if key: + logging.debug("MPIWGROOT (getProjectsOfMember):"+key) proj=self.ProjectCatalog({'getPersonKeyList':utf8ify(key)}) else: return ret # key muss definiert sein - + #logging.debug("MPIWGROOT (getProjectsOfMember):"+repr(proj)) if proj: proj2=[] for x in proj: @@ -1501,6 +1618,19 @@ class MPIWGRoot(ZSQLExtendFolder): return "done" + def sortResults(self,results): + """search the catalog and give results back sorted by meta_type""" + ret = {} + logging.debug(results()) + for result in results(): + metaType = result.meta_type + resultList= ret.get(metaType,[]) + resultList.append(result) + ret[metaType]=resultList + + logging.debug(ret) + return ret + def manage_addMPIWGRootForm(self): """form for adding the root"""