"""CDLI extensions of the filearchive""" from Products.versionedFile.extVersionedFile import * from Products.ZCatalog.CatalogPathAwareness import CatalogAware import os.path import os import urlparse import urllib import cgi from OFS.OrderedFolder import OrderedFolder from OFS.SimpleItem import SimpleItem import time from OFS.Folder import manage_addFolder import re from AccessControl import ClassSecurityInfo from Acquisition import Implicit from Globals import Persistent from threading import Thread from ZPublisher.HTTPRequest import HTTPRequest from ZPublisher.HTTPResponse import HTTPResponse from ZPublisher.BaseRequest import RequestContainer import threading import logging import transaction import copy import codecs import sys from BTrees.IOBTree import IOBTree import cdliSplitter from sets import Set import md5 from DownloadBasket import DownloadBasketFinallyThread from types import * import pickle import tempfile from cdli_helpers import * class CDLIFileObject(CatalogAware,extVersionedFileObject): """CDLI file object""" meta_type="CDLI File Object" default_catalog='CDLIObjectsCatalog' security=ClassSecurityInfo() security.declareProtected('manage','index_html') security.declarePublic('view') view = PageTemplateFile('zpt/viewCDLIFile.zpt', globals()) security.declarePublic('editATF') editATF = PageTemplateFile('zpt/editATFFile.zpt', globals()) def PrincipiaSearchSource(self): """Return cataloguable key for ourselves.""" return str(self) def setAuthor(self, author): """change the author""" self.author = author def makeThisVersionCurrent_html(self): """form for mthis version current""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','makeThisVersionCurrent.zpt')).__of__(self) return pt() security.declarePublic('makeThisVersionCurrent') def makeThisVersionCurrent(self,comment,author,RESPONSE=None): """copy this version to current""" parent=self.aq_parent parent.manage_addVersionedFileObject(id=None,vC=comment,author=author,file=self.getData(),RESPONSE=RESPONSE) #newversion=parent.manage_addCDLIFileObject('',comment,author) #newversion.manage_upload(self.getData()) #if RESPONSE is not None: # RESPONSE.redirect(self.aq_parent.absolute_url()+'/history') return True def getFormattedData(self): """fromat text""" data=self.getData() # return re.sub("\s\#lem"," #lem",data) #remove return vor #lem return re.sub("#lem"," #lem",data) #remove return vor #lem security.declarePublic('getPNumber') def getPNumber(self): """get the pnumber""" try: txt=re.match("&[Pp](\d*)\s*=([^\r\n]*)",self.getData()[0:]) except: txt=self.getData()[0:] return "ERROR" try: return "P"+txt.group(1) except: return "ERROR" security.declarePublic('getDesignation') def getDesignation(self): """get the designation out of the file""" try: txt=re.match("&[Pp](\d*)\s*=([^\r\n]*)",self.getData()[0:]) except: txt=self.getData()[0:] return "ERROR" try: return txt.group(2) except: return "ERROR" manage_addCDLIFileObjectForm=DTMLFile('dtml/fileAdd', globals(),Kind='CDLIFileObject',kind='CDLIFileObject', version='1') def manage_addCDLIFileObject(self,id,vC='',author='', file='',title='',versionNumber=0, precondition='', content_type='', from_tmp=False,REQUEST=None): """Add a new File object. Creates a new File object 'id' with the contents of 'file'""" id=str(id) title=str(title) content_type=str(content_type) precondition=str(precondition) id, title = cookId(id, title, file) self=self.this() # First, we create the file without data: self._setObject(id, CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=vC,time=time.localtime(),author=author)) fob = self._getOb(id) # Now we "upload" the data. By doing this in two steps, we # can use a database trick to make the upload more efficient. if file and not from_tmp: fob.manage_upload(file) elif file and from_tmp: fob.manage_file_upload(file) # manage_upload_from_tmp doesn't exist in ExtFile2 # fob.manage_upload_from_tmp(file) # manage_upload_from_tmp doesn't exist in ExtFile2 if content_type: fob.content_type=content_type #logging.debug("manage_add: lastversion=%s"%self.getData()) logging.debug("reindex1: %s in %s"%(repr(self),repr(self.default_catalog))) self.reindex_object() #logging.debug("manage_add: fob_data=%s"%fob.getData()) logging.debug("reindex2: %s in %s"%(repr(fob), repr(fob.default_catalog))) fob.index_object() self.CDLIRoot.updateOrAddToFileBTree(ob) if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main') class CDLIFile(extVersionedFile,CatalogAware): """CDLI file""" security=ClassSecurityInfo() meta_type="CDLI file" content_meta_type = ["CDLI File Object"] default_catalog='CDLICatalog' security.declareProtected('manage','index_html') def getLastVersionData(self): """get last version data""" return self.getData() def getLastVersionFormattedData(self): """get last version data""" return self.getContentObject().getFormattedData() def getTextId(self): """returns P-number of text""" # assuming that its the beginning of the title return self.title[:7] #security.declarePublic('history') def history(self): """history""" ext=self.ZopeFind(self.aq_parent,obj_ids=["history_template.html"]) if ext: return getattr(self,ext[0][1].getId())() pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','versionHistory')).__of__(self) return pt() def getBasketFromId(self,basketid, context=None): """get basket from id""" if not context: context=self for basket in self.ZopeFind(context,obj_metatypes=["CDLIBasket"]): if basket[0]==basketid: return basket[1] else: None def isContainedInBaskets(self,context=None): """check is this file is part of any basket @param context: (optional) necessessary if CDLIBasketCatalog is not an (inherited) attribute of self, context.CDLIBasketCatalog has to exist. """ if not context: context=self ret=[] for x in context.CDLIBasketCatalog.search({'getFileNamesInLastVersion':self.getId()}): #if the basket x is deleted it seemes to be that x is sometimes still in the Catalog, why? try: ret.append(x.getObject()) except: pass return ret #return [x.getObject() for x in context.CDLIBasketCatalog.search({'getFileNamesInLastVersion':self.getId()})] def _newContentObject(self, id, title='', versionNumber=0, versionComment=None, time=None, author=None): """factory for content objects. to be overridden in derived classes.""" logging.debug("_newContentObject(CDLI)") return CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=versionComment,time=time,author=author) def addCDLIFileObjectForm(self): """add a new version""" if str(self.REQUEST['AUTHENTICATED_USER']) in ["Anonymous User"]: return "please login first" if (self.lockedBy==self.REQUEST['AUTHENTICATED_USER']) or (self.lockedBy==''): out=DTMLFile('dtml/fileAdd', globals(),Kind='CDLIFileObject',kind='CDLIFileObject',version=self.getVersion()).__of__(self) return out() else: return "Sorry file is locked by somebody else" def manage_addCDLIFileObject(self,id,vC,author, file='',title='', precondition='', content_type='', changeName='no',newName='', come_from=None, from_tmp=False,RESPONSE=None): """add""" try: #TODO: der ganze vC unsinn muss ueberarbeitet werden vC=self.REQUEST['vC'] except: pass ob = self.addContentObject(id, vC, author, file, title, changeName=changeName, newName=newName, from_tmp=from_tmp, precondition=precondition, content_type=content_type) try: #FIXME: wozu ist das gut? self.REQUEST.SESSION['objID_parent']=self.getId() except: pass #self.cdliRoot.updateOrAddToFileBTree(self)# now update the object in the cache if RESPONSE: if ob.getSize()==0: self.REQUEST.SESSION['objID']=ob.getId() pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','errorUploadFile')).__of__(self) return pt() else: if come_from and (come_from!=""): RESPONSE.redirect(come_from+"?change="+self.getId()) else: RESPONSE.redirect(self.REQUEST['URL2']+'?uploaded=%s'%self.title) else: return ob def manage_addCDLIFileForm(self): """interface for adding the OSAS_root""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addCDLIFile.zpt')).__of__(self) return pt() def manage_addCDLIFile(self,id,title,lockedBy, author=None, RESPONSE=None): """add the OSAS_root""" newObj=CDLIFile(id,title,lockedBy,author) tryToggle=True tryCount=0 self._setObject(id,newObj) getattr(self,id).reindex_object() if RESPONSE is not None: RESPONSE.redirect('manage_main') def checkUTF8(data): """check utf 8""" try: data.encode('utf-8') return True except: return False def checkFile(filename,data,folder): """check the files""" # first check the file name fn=filename.split(".") # no extension if not fn[0][0]=="P": return False,"P missing in the filename" elif len(fn[0])!=7: return False,"P number has not the right length 6" elif not checkUTF8(data): return False,"not utf-8" else: return True,"" def splitatf(fh,dir=None,ext=None): """split it""" ret=None nf=None i=0 #ROC: why split \n first and then \r??? if (type(fh) is StringType) or (type(fh) is UnicodeType): iter=fh.split("\n") else: iter=fh.readlines() for lineTmp in iter: lineTmp=lineTmp.replace(codecs.BOM_UTF8,'') # make sure that all BOM are removed.. for line in lineTmp.split("\r"): #logging.log("Deal with: %s"%line) if ext: i+=1 if (i%100)==0: ext.result+="." if i==10000: i=0 ext.result+="
" #check if basket name is in the first line if line.find("#atf basket")>=0: #old convention ret=line.replace('#atf basket ','') ret=ret.split('_')[0] elif line.find("#basket:")>=0: #new convention ret=line.replace('#basket: ','') ret=ret.split('_')[0] else: if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile if nf: nf.close() #close last file filename=line[1:].split("=")[0].rstrip()+".atf" if dir: filename=os.path.join(dir,filename) nf=file(filename,"w") logging.info("open %s"%filename) if nf: nf.write(line.replace("\n","")+"\n") try: nf.close() except: pass if not((type(fh) is StringType) or (type(fh) is UnicodeType)): fh.close() return ret,len(os.listdir(dir)) class CDLIFileFolder(extVersionedFileFolder): """CDLI File Folder""" security=ClassSecurityInfo() meta_type="CDLI Folder" file_meta_type=['CDLI file'] folder_meta_type=['CDLI Folder'] file_catalog='CDLICatalog' #downloadCounter=0 # counts how many download for all files currently run, be mehr als 5 wird verweigert. tmpStore2={} def _newVersionedFile(self, id, title='', lockedBy=None, author=None): """factory for versioned files. to be overridden in derived classes.""" logging.debug("_newVersionedFile(CDLI)") return CDLIFile(id, title, lockedBy=lockedBy, author=author) def setTemp(self,name,value): """set tmp""" setattr(self,name,value) deleteFileForm = PageTemplateFile("zpt/doDeleteFile", globals()) def delete(self,ids,REQUEST=None): """delete these files""" if type(ids) is not ListType: ids=[ids] self.manage_delObjects(ids) if REQUEST is not None: return self.index_html() def getVersionNumbersFromIds(self,ids): """get the numbers of the current versions of documents described by their ids""" ret=[] searchStr=" OR ".join(ids) founds=self.CDLICatalog.search({'title':searchStr}) for found in founds: lastVersion=found.getObject().getContentObject() ret.append((found.getId,lastVersion)) return ret def getFile(self,fn): """get the content of the file fn""" logging.debug("getFile: %s"%repr(fn)) if not self.hasObject(fn): # search deeper founds=getattr(self, self.file_catalog).search({'textid':fn}) if founds: obj=founds[0].getObject().getContentObject() else: return "" else: obj = self[fn].getContentObject() return obj.getData()[0:] def checkCatalog(self,fn): """check if fn is in the catalog""" #TODO add checkCatalog def findObjectsFromListWithVersion(self,list,author=None): """find objects from a list with versions @param list: list of tuples (cdliFile,version) """ #self.REQUEST.SESSION['fileIds']=list#store fieldIds in session for further usage #self.REQUEST.SESSION['searchList']=self.REQUEST.SESSION['fileIds'] pt=getattr(self,'filelistVersioned.html') return pt(search=list,author=author) def getAllPNumbers(self): """get a list of all files (resp their p-numbers) stored""" ret=[x.getId for x in self.CDLICatalog()] return ret def expandFile(self,fileId,fileTree): """wildcard in fileID suche alle Treffer""" founds=self.CDLICatalog({'title':fileId}) for found in founds: fileTree.add(found.getId) logging.debug("ADDD:"+found.getId) def findObjectsFromList(self,enterList=None,display=False,start=None,upload=None,list=None,basketName=None,numberOfObjects=None,RESPONSE=None,REQUEST=None,returnHash=False,hash=None): """findObjectsFromList (, TAB oder LINE separated)""" logging.debug("start: findObjectsFromList") #logging.debug("start: findObjectsFromList"+repr(list)) if upload: # list from file upload txt=upload.read() if enterList: txt=enterList if upload or enterList: txt=txt.replace(",","\n") txt=txt.replace("\t","\n") txt=txt.replace("\r","\n") idsTmp=txt.split("\n") ids=[] for id in idsTmp: # make sure that no empty lines idTmp=id.lstrip().rstrip() if len(idTmp)>0: ids.append(idTmp) #self.REQUEST.SESSION['ids']=" OR ".join(ids) pt=getattr(self,'filelist.html') self.REQUEST.SESSION['searchList']=ids return pt(search=ids) if basketName: #TODO: get rid of one of these.. pt=getattr(self,'filelist.html') return pt(basketName=basketName,numberOfObjects=numberOfObjects) result =self.CDLICache.retrieve(hash) if result: logging.debug("give result from storage2") return hash,result if list is not None: # got already a list logging.debug(" ----List version") ret=[] fileTree=Set() for fileId in list: if fileId.find("*")>-1: #check for wildcards self.expandFile(fileId,fileTree) elif len(fileId.split("."))==1: fileId=fileId+".atf" fileTree.add(fileId) #logging.debug(" -----:"+fileId) #ret+=self.CDLICatalog({'title':fileId}) #x =self.getFileObject(fileId) #if x is not None: # ret.append(x) ids = fileTree & self.v_file_ids #self.REQUEST.SESSION['fileIds']=ids#store fieldIds in session for further usage l=makelist(fileTree)[0:] #logging.debug("l-list:"+repr(l)) self.REQUEST.SESSION['fileIds']=l#store fieldIds in session for further usage self.REQUEST.SESSION['searchList']=l #self.REQUEST.SESSION['searchList']=['P000001.atf'] hash = md5.new(repr(makelist(fileTree))).hexdigest() # erzeuge hash als identification self.REQUEST.SESSION['hash']=hash #TODO: do I need garbage collection for v_tmpStore ? #logging.debug("Hash:"+repr(hash)) # # if hasattr(self.cdliRoot,'v_tmpStore') and self.cdliRoot.v_tmpStore.has_key(hash): # logging.debug("asking for storage") # res=self.cdliRoot.v_tmpStore[hash] # if res: # if returnHash == True: # return hash,res # return res #TODO: get rid of one of these.. #ids=[x.getObject().getId() for x in ret] ret=[(self.getFileObject(x),self.getFileObjectLastVersion(x)) for x in ids] #self.REQUEST.SESSION['fileIds']=ids#store fieldIds in session for further usage #self.REQUEST.SESSION['searchList']=self.REQUEST.SESSION['fileIds'] if display: pt=getattr(self,'filelist.html') return pt(search=ids) else: #self.REQUEST.SESSION['hash'] = ret # store in session #logging.debug("HHHHHHNEU:"+repr(self.makelist(ids))) #logging.debug("HHHHHHNEU:"+repr(hash)) self.CDLICache.store(hash,ret) if returnHash == True: return hash,ret return ret if start: RESPONSE.redirect("filelist.html?start:int="+str(start)) security.declareProtected('Manage','createAllFilesAsSingleFile') def createAllFilesAsSingleFile(self,RESPONSE=None): """download all files""" def sortF(x,y): return cmp(x[0],y[0]) catalog=getattr(self,self.file_catalog) #tf,tfilename=mkstemp() if not hasattr(self.temp_folder,'downloadCounter'): self.temp_folder.downloadCounter=0 if getattr(self.temp_folder,'downloadCounter',0) > 5: return """I am sorry, currently the server has to many requests for downloads, please come back later!""" self.temp_folder.downloadCounter+=1 self._p_changed=1 transaction.get().commit() list=[(x.getId,x) for x in catalog()] list.sort(sortF) RESPONSE.setHeader("Content-Disposition","""attachement; filename=%s"""%"all.atf") RESPONSE.setHeader("Content-Type","application/octet-stream") tmp="" for l in list: obj=l[1].getObject() if obj.meta_type=="CDLI file": #os.write(tf,obj.getLastVersion().data) if RESPONSE: RESPONSE.write(obj.getData()[0:]) RESPONSE.write("\n") self.temp_folder.downloadCounter-=1 self._p_changed=1 transaction.get().commit() #os.close(tf) #RESPONSE.redirect(self.absolute_url()+"/downloadFile?fn="%tfilename) return True def downloadFile(self,fn): """download fn - not used yet""" self.REQUEST.RESPONSE.setHeader("Content-Disposition","""attachement; filename=%s"""%self.getLastVersion().getId()) self.REQUEST.RESPONSE.setHeader("Content-Type","application/octet-stream") self.REQUEST.RESPONSE.write(file(fn).read()) def hasParent(self): """returns true falls subfolder""" if self.aq_parent.meta_type in self.folder_meta_type: return True else: return False def getFolders(self): """get all subfolders""" ret=[] folders=self.ZopeFind(self,obj_metatypes=self.folder_meta_type) for folder in folders: ret.append((folder[1], len(self.ZopeFind(folder[1],obj_metatypes=self.folder_meta_type)), len(self.ZopeFind(folder[1],obj_metatypes=self.file_meta_type)) )) return ret security.declareProtected('manage','index_html') def index_html(self): """main""" ext=self.ZopeFind(self,obj_ids=["index.html"]) if ext: return ext[0][1]() pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','CDLIFileFolderMain')).__of__(self) return pt() manage_addCDLIFileFolderForm=DTMLFile('dtml/folderAdd', globals()) def manage_addCDLIFileFolder(self, id, title='', createPublic=0, createUserF=0, REQUEST=None): """Add a new Folder object with id *id*. If the 'createPublic' and 'createUserF' parameters are set to any true value, an 'index_html' and a 'UserFolder' objects are created respectively in the new folder. """ ob=CDLIFileFolder() ob.id=str(id) ob.title=title self._setObject(id, ob) ob=self._getOb(id) checkPermission=getSecurityManager().checkPermission if createUserF: if not checkPermission('Add User Folders', ob): raise Unauthorized, ( 'You are not authorized to add User Folders.' ) ob.manage_addUserFolder() if REQUEST is not None: return self.manage_main(self, REQUEST, update_menu=1) class CDLIRoot(Folder): """main folder for cdli""" meta_type="CDLIRoot" downloadCounterBaskets=0 # counts the current basket downloads if counter > 10 no downloads are possible file_catalog = 'CDLICatalog' # word splitter for search splitter = {'words':cdliSplitter.wordSplitter(), 'graphemes':cdliSplitter.graphemeSplitter()} def unicodify(self,txt): return unicodify(txt) def invalidateOldCacheVersion(self): """loescht die alte Version des Cache""" del self.v_tmpStore return "done" def viewATF(self,id,RESPONSE): """view an Object""" ob = self.CDLICatalog({'title':id}) logging.debug(ob[0].getObject().getLastVersion().absolute_url()+"/view") if len(ob)>0: RESPONSE.redirect(ob[0].getObject().getLastVersion().absolute_url()+"/view") return "not found" def history(self,id,RESPONSE): """view an Object""" ob = self.CDLICatalog({'title':id}) if len(ob)>0: RESPONSE.redirect(ob[0].absolute_url+"/history") return "not found" def downloadLocked(self,id,RESPONSE): """view an Object""" ob = self.CDLICatalog({'title':id}) if len(ob)>0: RESPONSE.redirect(ob[0].absolute_url+"/downloadLocked") return "not found" def download(self,id,RESPONSE): """view an Object""" ob = self.CDLICatalog({'title':id}) if len(ob)>0: RESPONSE.redirect(ob[0].getLastVersion().absolute_url()) return "not found" def addCDLIFileObjectForm(self,id,RESPONSE): """view an Object""" ob = self.CDLICatalog({'title':id}) if len(ob)>0: RESPONSE.redirect(ob[0].absolute_url+"/addCDLIFileObjectForm") return "not found" def addVersionedFileObjectForm(self,id,RESPONSE): """view an Object""" ob = self.CDLICatalog({'title':id}) if len(ob)>0: RESPONSE.redirect(ob[0].absolute_url+"/addVersionedFileObjectForm") return "not found" def unlock(self,id,RESPONSE): """view an Object""" ob = self.CDLICatalog({'title':id}) if len(ob)>0: RESPONSE.redirect(ob[0].absolute_url+"/unlock") return "not found" def getFileObject(self,fileId): """get an object""" x=self.v_files.get(fileId) #logging.debug(x) return x def getFileObjectLastVersion(self,fileId): """get an object""" x=self.v_files_lastVersion.get(fileId) #logging.debug("lastVersion: "+repr(x)) return x def showFileIds(self): """showIds""" return self.v_file_ids def generateFileBTree(self): """erzeuge einen Btree aus allen Files""" self.v_files = OOBTree() self.v_files_lastVersion = OOBTree() self.v_file_ids = Set() for x in self.CDLICatalog.searchResults(): self.v_files.update({x.getId:x.getObject()}) self.v_files_lastVersion.update({x.getId:x.getObject().getLastVersion()}) self.v_file_ids.add(x.getId) logging.debug("add:"+x.getId+"XXX"+repr(x.getObject())) return True def updateOrAddToFileBTree(self,obj): """update a BTree""" self.v_files.update({obj.getId():obj}) self.v_files_lastVersion.update({obj.getId():obj.getLastVersion()}) self.v_file_ids.add(obj.getId()) logging.debug("update:"+obj.getId()+"XXX"+repr(obj)) def deleteFromBTree(self,objId): """delete an obj""" self.v_files.pop(objId) self.v_files_lastVersion.pop(objId) self.v_file_ids.remove(objId) def deleteFiles(self,ids): """delete files""" for id in ids: founds=self.CDLICatalog.search({'title':id.split(".")[0]}) if founds: logging.debug("deleting %s"%founds) folder=founds[0].getObject().aq_parent #get the parent folder of the object logging.debug("deleting from %s"%folder) cut=folder.delete([founds[0].getId]) #cut it out def searchText(self, query, index='graphemes'): """searches query in the fulltext index and returns a list of file ids/P-numbers""" # see also: http://www.plope.com/Books/2_7Edition/SearchingZCatalog.stx#2-13 logging.debug("searchtext for '%s' in index %s"%(query,index)) #import Products.ZCTextIndex.QueryParser #qp = QueryParser.QueryParser() #logging.debug() idxQuery = {index:{'query':query}} idx = getattr(self, self.file_catalog) # do search resultset = idx.search(query_request=idxQuery,sort_index='textid') # put only the P-Number in the result results = [res.getId[:7] for res in resultset] logging.debug("searchtext: found %d texts"%len(results)) return results def getFile(self, pnum): """get the translit file with the given pnum""" f = getattr(self, self.file_catalog).search({'textid':pnum}) if not f: return "" return f[0].getObject().getData() def showFile(self,fileId,wholePage=False): """show a file @param fileId: P-Number of the document to be displayed """ f=getattr(self, self.file_catalog).search({'textid':fileId}) if not f: return "" if wholePage: logging.debug("show whole page") return f[0].getObject().getContentObject().view() else: return f[0].getObject().getLastVersionFormattedData() def showWordInFile(self,fileId,word,indexName='graphemes',regExp=False,): """get lines with word from FileId""" logging.debug("showwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId)) file = formatAtfFullLineNum(self.getFile(fileId)) ret=[] # add whitespace before and whitespace and line-end to splitter bounds expressions bounds = self.splitter[indexName].bounds splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds) # clean word expression # TODO: this should use QueryParser itself # take out double quotes word = word.replace('"','') # take out ignorable signs ignorable = self.splitter[indexName].ignorex word = ignorable.sub('', word) # compile into regexp objects and escape parens wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')] for line in file.splitlines(): for word in wordlist: #logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line))) if word.search(ignorable.sub('',line)): line = formatAtfLineHtml(line) ret.append(line) break return ret def showWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False): """ get lines with word from all ids in list FileIds. returns dict with id:lines pairs. """ logging.debug("showwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds)) return dict([(id,self.showWordInFile(id, word, indexName, regExp)) for id in fileIds]) def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False): """get text with word highlighted from FileId""" logging.debug("tagwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId)) file=self.getFile(fileId) tagStart=u'' tagEnd=u'' tagStr=tagStart + u'%%s' + tagEnd ret=[] # add whitespace to splitter bounds expressions and compile into regexp object bounds = self.splitter[indexName].bounds wordsplit = re.compile("(%s|\s)"%bounds) # clean word expression # TODO: this should use QueryParser itself word = word.replace('"','') # take out double quotes # take out ignoreable signs ignorable = self.splitter[indexName].ignorex word = ignorable.sub('', word) # split search terms by blanks words = word.split(' ') # split search terms again (for grapheme search with words) splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words)) for line in file.splitlines(): line = unicodify(line) # ignore lemma and other lines if line.lstrip().startswith('#lem:'): continue # ignore p-num line if line.startswith('&P'): continue # ignore version lines if line.startswith('#version'): continue # ignore atf type lines if line.startswith('#atf:'): continue # first scan hitwords = [] for w in words: if ignorable.sub('',line).find(w) > -1: # word is in line # append split word for grapheme search with words hitwords.extend(splitwords[w]) #hitwords.extend(wordsplit.split(w)) # examine hits closer if hitwords: # split line into words parts = wordsplit.split(line) line = "" for p in parts: #logging.debug("tagwordinfile: searching for %s in %s"%(p,hitwords)) # reassemble line if ignorable.sub('', p) in hitwords: #logging.debug("tagwordinfile: found %s in %s"%(p,hitwords)) # this part was found line += tagStart + formatAtfHtml(p) + tagEnd else: line += formatAtfHtml(p) else: # no hits line = formatAtfHtml(line) ret.append(line) return u'
\n'.join(ret) def tagWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False): """ get texts with highlighted word from all ids in list FileIds. returns dict with id:text pairs. """ logging.debug("tagwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds)) return dict([(id,self.tagWordInFile(id, word, indexName, regExp)) for id in fileIds]) def getFileVersionList(self, pnum): """get the version history as a list for the translit file with the given pnum""" f = getattr(self, self.file_catalog).search({'textid':pnum}) if not f: return [] return f[0].getObject().getVersionList() def URLquote(self,str): """quote url""" return urllib.quote(str) def URLunquote(self,str): """unquote url""" return urllib.unquote(str) def URLquote_plus(self,str): """quote url""" return urllib.quote_plus(str) def URLunquote_plus(self,str): """unquote url""" return urllib.unquote_plus(str) def changeUserForPObjectFromFile(self,user,fname): """aendere user fuer alle p in fiele""" pns = file(fname,"r") for p in pns.readline(): f = getattr(self, self.file_catalog).search({'textid':pnum}) pObj=f.getObject() lv=pObj.getContentObject() logging.info("author:"+lv.author) def forceunlock(self,REQUEST=None,user=None): "break all locks" ret=[] for f in self.ZopeFind(self,obj_metatypes="CDLI file",search_sub=1): un=f[1].forceunlock(user=user) logging.info("check:"+f[0]) if un and un !="": ret.append((f[0],un)) if REQUEST is not None: pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','breakLockResponse.zpt')).__of__(self) return pt(ret=ret) return ret def getLockedFiles(self,REQUEST=None,user=None): """hole alle gesperrten files""" ret={} for f in self.ZopeFind(self,obj_metatypes="CDLI file",search_sub=1): lb = f[1].lockedBy add=False if (lb is not None) and (lb!=""): if user is None: add=True else: if str(lb)==user: add=True if add==True: if not ret.has_key(lb): ret[lb]=[] ret[lb].append(f[0]) if REQUEST is not None: pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','showlockResponse.zpt')).__of__(self) return pt(ret=ret) return ret def getChangesByAuthor(self,author,n=100): """getChangesByAuthor""" zcat=self.CDLIObjectsCatalog res=zcat({'lastEditor':author, 'sort_on':'getTime', 'sort_order':'descending', 'sort_limit':n})[:n ] return res def getChangesByAuthor_html(self,author,n=100): """html output for changes by author""" tmp={} list=[] for x in self.getChangesByAuthor(author): nr=x.getObject().getVersionNumber() id=x.getObject().aq_parent.getId() #hinzufuegen, wenn Version neuer als die if tmp.get(id,(0,0))[1] < nr: tmp[id]=(x.getObject().aq_parent,nr) return self.cdli_main.findObjectsFromListWithVersion(list=tmp.values(),author=author) def getLastChanges(self,n=100): """get the last n changes""" n=int(n) zcat=self.CDLICatalog return zcat({'sort_on':'getLastChangeDate', 'sort_order':'descending', 'sort_limit':n})[:n ] def getLastChanges_html(self,n=100): """get the last n changes""" list = [x.getId for x in self.getLastChanges(n)] return self.cdli_main.findObjectsFromList(list=list,display=True) def refreshTxt(self,txt="",threadName=None): """txt fuer refresh""" return """ 2;url=%s?repeat=%s """%(self.absolute_url()+txt,threadName) def refreshTxtBasket(self,txt="",threadName=None): """txt fuer refresh""" return """ 2;url=%s?repeat=%s """%(txt,threadName) def getResult(self,threadName=None): """result of thread""" try: return self._v_uploadATF[threadName].getResult() except: return "One moment, please" def checkThreads(self): """check threads""" ret="" for thread in threading.enumerate(): ret+="

%s (%s): %s

"%(repr(thread),thread.getName(),thread.isAlive()) return ret def uploadATFRPC(self,data,username): """upload an atffile via xml-rpc""" uploader=uploadATFThread() #generate an random id for the upload object from random import randint if (not self.REQUEST.SESSION.get('idTmp',None)): idTmp=str(randint(0,1000000000)) self.REQUEST.SESSION['idTmp']=idTmp else: idTmp=self.REQUEST.SESSION.get('idTmp',None) uploader.set(data,0,username,idTmp) stObj=uploader.run() processor=uploadATFfinallyThread() basketname=stObj.returnValue['basketNameFromFile'] processor.set("uploadchanged",basketname=basketname,SESSION=stObj.returnValue,username=username,serverport=self.REQUEST['SERVER_PORT']) processor.run() return generateXMLReturn(stObj.returnValue) def uploadATF(self,repeat=None,upload=None,basketId=0,RESPONSE=None): """upload an atf file / basket file""" #self._v_uploadATF.returnValue=None #generate an random id for the upload thread from random import randint if (not self.REQUEST.SESSION.get('idTmp',None)): idTmp=str(randint(0,1000000000)) self.REQUEST.SESSION['idTmp']=idTmp else: idTmp=self.REQUEST.SESSION.get('idTmp',None) threadName=repeat if not threadName or threadName=="": #new thread not called from the waiting page tmpVar=False thread=uploadATFThread() threadName=thread.getName()[0:] if (not hasattr(self,'_v_uploadATF')): self._v_uploadATF={} self._v_uploadATF[threadName]=thread #self._xmltrans.start() #thread=Thread(target=self._v_uploadATF) logging.info("set thread. extern") self._v_uploadATF[threadName].set(upload,basketId,self.REQUEST['AUTHENTICATED_USER'],idTmp,serverport=self.REQUEST['SERVER_PORT']) #thread.start() logging.info("start thread. extern") self._v_uploadATF[threadName].start() self.threadName=self._v_uploadATF[threadName].getName()[0:] wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template']) if wait_template: return wait_template[0][1]() pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadATFWait.zpt')).__of__(self) return pt(txt='/uploadATF',threadName=threadName) #_v_xmltrans.run() else: #recover thread, if lost if (not hasattr(self,'_v_uploadATF')): self._v_uploadATF={} if not self._v_uploadATF.get(threadName,None): for thread in threading.enumerate(): if threadName == thread.getName(): self._v_uploadATF[threadName]=thread if self._v_uploadATF.get(threadName,None) and (not self._v_uploadATF[threadName].returnValue): wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template']) if wait_template: return wait_template[0][1]() pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadATFWait.zpt')).__of__(self) return pt(txt='/uploadATF',threadName=threadName) else: tmp=getattr(self.temp_folder,idTmp).returnValue pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadCheck.zpt')).__of__(self) return pt(changed=tmp['changed'],lockerrors=tmp['lockerrors'],errors=tmp['errors'],dir=tmp['dir'],newPs=tmp['newPs'],basketLen=tmp['basketLen'],numberOfFiles=tmp['numberOfFiles'], basketNameFromId=tmp['basketNameFromId'],basketNameFromFile=tmp['basketNameFromFile'],basketId=tmp['basketId']) def redoUpload(self,threadName): """redo the upload""" tmp=self.cdli_main.tmpStore2[threadName] pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadCheck.zpt')).__of__(self) return pt(changed=tmp['changed'],lockerrors=tmp['lockerrors'],errors=tmp['errors'],dir=tmp['dir'],newPs=tmp['newPs'],basketLen=tmp['basketLen'],numberOfFiles=tmp['numberOfFiles'], basketNameFromId=tmp['basketNameFromId'],basketNameFromFile=tmp['basketNameFromFile'],basketId=tmp['basketId']) def uploadATFfinally(self,procedure='',comment="",basketname='',unlock=None,repeat=None,RESPONSE=None): """nowupload the files""" threadName=repeat if not threadName or threadName=="": thread=uploadATFfinallyThread() threadName=thread.getName()[0:] if (not hasattr(self,'_v_uploadATF')): self._v_uploadATF={} self._v_uploadATF[threadName]=thread idTmp=self.REQUEST.SESSION['idTmp'] stObj=getattr(self.temp_folder,idTmp) self._v_uploadATF[threadName].set(procedure,comment=comment,basketname=basketname,unlock=unlock,SESSION=stObj.returnValue,username=self.REQUEST['AUTHENTICATED_USER'],serverport=self.REQUEST['SERVER_PORT']) self._v_uploadATF[threadName].start() wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template']) if wait_template: return wait_template[0][1]() pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadATFWait.zpt')).__of__(self) return pt(txt='/uploadATFfinally',threadName=threadName) #_v_xmltrans.run() else: #recover thread, if lost if not hasattr(self,'_v_uploadATF'): self._v_uploadATF={} if not self._v_uploadATF.get(threadName,None): for thread in threading.enumerate(): if threadName == thread.getName(): self._v_uploadATF[threadName]=thread if self._v_uploadATF.get(threadName,None) and (self._v_uploadATF[threadName] is not None) and (not self._v_uploadATF[threadName].end) : wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template']) if wait_template: return wait_template[0][1]() pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadATFWait.zpt')).__of__(self) return pt(txt='/uploadATFfinally',threadName=threadName) else: idTmp=self.REQUEST.SESSION['idTmp'] stObj=getattr(self.temp_folder,idTmp) self.REQUEST.SESSION['idTmp']=None #update changed logging.debug("dir:"+repr(stObj.returnValue['changed'])) for x in stObj.returnValue['changed']: ob=self.CDLICatalog.search({'title':x[0]}) try: self.cdliRoot.updateOrAddToFileBTree(ob[0].getObject()) except: logging.error("uploadATFfinally - cannot update Object %s Error: %s %s"%(ob[1],sys.exc_info()[0],sys.exc_info()[1])) if RESPONSE is not None: RESPONSE.redirect(self.absolute_url()) def importFiles(self,comment="",author="" ,folderName="/Users/dwinter/atf", files=None,ext=None): """import files""" logging.debug("importFiles folderName=%s files=%s ext=%s"%(folderName,files,ext)) root=self.cdli_main count=0 if not files: files=os.listdir(folderName) for f in files: folder=f[0:3] f2=f[0:5] #check if main folder PXX already exists obj=self.ZopeFind(root,obj_ids=[folder]) logging.debug("importFiles: folder=%s f2=%s obj=%s"%(folder,f2,obj)) if ext: ext.result="

adding: %s

"%f+ext.result if not obj: # if not create it manage_addCDLIFileFolder(root,folder,folder) fobj=getattr(root,folder) #transaction.get().commit() else: fobj=obj[0][1] #check IF PYYYYY already exist obj2=fobj.ZopeFind(fobj,obj_ids=[f2]) logging.debug("importFiles: fobj=%s obj2=%s"%(fobj,obj2)) if not obj2:# if not create it manage_addCDLIFileFolder(fobj,f2,f2) fobj2=getattr(fobj,f2) else: fobj2=obj2[0][1] # not add the file file2=os.path.join(folderName,f) id=f logging.debug("importFiles: addCDLIFile fobj2=%s, f=%s file2=%s"%(fobj2,repr(f),repr(file2))) fobj2.addFile(vC='',file=file(file2),author=author,newName=f) count+=1 #now add the file to the storage ob = getattr(fobj2,f) self.cdliRoot.updateOrAddToFileBTree(ob) if count%100==0: logging.debug("importfiles: committing") transaction.get().commit() transaction.get().commit() return "ok" manage_addCDLIRootForm=DTMLFile('dtml/rootAdd', globals()) def manage_addCDLIRoot(self, id, title='', createPublic=0, createUserF=0, REQUEST=None): """Add a new Folder object with id *id*. If the 'createPublic' and 'createUserF' parameters are set to any true value, an 'index_html' and a 'UserFolder' objects are created respectively in the new folder. """ ob=CDLIRoot() ob.id=str(id) ob.title=title try: self._setObject(id, ob) except: pass ob=self._getOb(id) checkPermission=getSecurityManager().checkPermission if createUserF: if not checkPermission('Add User Folders', ob): raise Unauthorized, ( 'You are not authorized to add User Folders.' ) ob.manage_addUserFolder() if REQUEST is not None: return self.manage_main(self, REQUEST, update_menu=1) import cdli_basket # Die folgenden Klassen sollte nicht mehr aus diesem Paket benutzt werden, sondern direkt aus # cdli_basket importiert werden. class uploadATFfinallyThread(cdli_basket.uploadATFfinallyThread): """depricates""" pass class tmpStore(cdli_basket.tmpStore): """depricated""" pass class uploadATFThread(cdli_basket.uploadATFThread): """depricated""" pass class CDLIBasketContainer(cdli_basket.CDLIBasketContainer): """depricated""" pass class CDLIBasket(cdli_basket.CDLIBasket): """depricated""" pass class CDLIBasketVersion(cdli_basket.CDLIBasketVersion): """depricated""" pass class BasketContent(cdli_basket.BasketContent): """depricated""" pass