"""CDLI extensions of the filearchive"""
from Products.versionedFile.extVersionedFile import *
from Products.ZCatalog.CatalogPathAwareness import CatalogAware
import os.path
import os
import urlparse
import urllib
import cgi
from OFS.OrderedFolder import OrderedFolder
from OFS.SimpleItem import SimpleItem
import time
from OFS.Folder import manage_addFolder
import re
from AccessControl import ClassSecurityInfo
from Acquisition import Implicit
from Globals import Persistent
from threading import Thread
from ZPublisher.HTTPRequest import HTTPRequest
from ZPublisher.HTTPResponse import HTTPResponse
from ZPublisher.BaseRequest import RequestContainer
import threading
import logging
import transaction
import copy
import codecs
import sys
from BTrees.IOBTree import IOBTree
from BTrees.OOBTree import OOBTree
import cdliSplitter
from sets import Set
import md5
from DownloadBasket import DownloadBasketFinallyThread
from types import *
import pickle
import tempfile
from cdli_helpers import *
class CDLIFileObject(CatalogAware,extVersionedFileObject):
"""CDLI file object"""
meta_type="CDLI File Object"
default_catalog='CDLIObjectsCatalog'
security=ClassSecurityInfo()
security.declareProtected('manage','index_html')
security.declarePublic('view')
view = PageTemplateFile('zpt/viewCDLIFile.zpt', globals())
security.declarePublic('editATF')
editATF = PageTemplateFile('zpt/editATFFile.zpt', globals())
def PrincipiaSearchSource(self):
"""Return cataloguable key for ourselves."""
return str(self)
def setAuthor(self, author):
"""change the author"""
self.author = author
def makeThisVersionCurrent_html(self):
"""form for mthis version current"""
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','makeThisVersionCurrent.zpt')).__of__(self)
return pt()
security.declarePublic('makeThisVersionCurrent')
def makeThisVersionCurrent(self,comment,author,RESPONSE=None):
"""copy this version to current"""
parent=self.aq_parent
parent.manage_addVersionedFileObject(id=None,vC=comment,author=author,file=self.getData(),RESPONSE=RESPONSE)
#newversion=parent.manage_addCDLIFileObject('',comment,author)
#newversion.manage_upload(self.getData())
#if RESPONSE is not None:
# RESPONSE.redirect(self.aq_parent.absolute_url()+'/history')
return True
def getFormattedData(self):
"""fromat text"""
data=self.getData()
# return re.sub("\s\#lem"," #lem",data) #remove return vor #lem
return re.sub("#lem"," #lem",data) #remove return vor #lem
security.declarePublic('getPNumber')
def getPNumber(self):
"""get the pnumber"""
try:
txt=re.match("&[Pp](\d*)\s*=([^\r\n]*)",self.getData()[0:])
except:
txt=self.getData()[0:]
return "ERROR"
try:
return "P"+txt.group(1)
except:
return "ERROR"
security.declarePublic('getDesignation')
def getDesignation(self):
"""get the designation out of the file"""
try:
txt=re.match("&[Pp](\d*)\s*=([^\r\n]*)",self.getData()[0:])
except:
txt=self.getData()[0:]
return "ERROR"
try:
return txt.group(2)
except:
return "ERROR"
manage_addCDLIFileObjectForm=DTMLFile('dtml/fileAdd', globals(),Kind='CDLIFileObject',kind='CDLIFileObject', version='1')
def manage_addCDLIFileObject(self,id,vC='',author='', file='',title='',versionNumber=0,
precondition='', content_type='',
from_tmp=False,REQUEST=None):
"""Add a new File object.
Creates a new File object 'id' with the contents of 'file'"""
id=str(id)
title=str(title)
content_type=str(content_type)
precondition=str(precondition)
id, title = cookId(id, title, file)
self=self.this()
# First, we create the file without data:
self._setObject(id, CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=vC,time=time.localtime(),author=author))
fob = self._getOb(id)
# Now we "upload" the data. By doing this in two steps, we
# can use a database trick to make the upload more efficient.
if file and not from_tmp:
fob.manage_upload(file)
elif file and from_tmp:
fob.manage_file_upload(file) # manage_upload_from_tmp doesn't exist in ExtFile2
# fob.manage_upload_from_tmp(file) # manage_upload_from_tmp doesn't exist in ExtFile2
if content_type:
fob.content_type=content_type
#logging.debug("manage_add: lastversion=%s"%self.getData())
logging.debug("reindex1: %s in %s"%(repr(self),repr(self.default_catalog)))
self.reindex_object()
#logging.debug("manage_add: fob_data=%s"%fob.getData())
logging.debug("reindex2: %s in %s"%(repr(fob), repr(fob.default_catalog)))
fob.index_object()
self.CDLIRoot.updateOrAddToFileBTree(ob)
if REQUEST is not None:
REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')
class CDLIFile(extVersionedFile,CatalogAware):
"""CDLI file"""
security=ClassSecurityInfo()
meta_type="CDLI file"
content_meta_type = ["CDLI File Object"]
default_catalog='CDLICatalog'
security.declareProtected('manage','index_html')
def getLastVersionData(self):
"""get last version data"""
return self.getData()
def getLastVersionFormattedData(self):
"""get last version data"""
return self.getContentObject().getFormattedData()
def getTextId(self):
"""returns P-number of text"""
# assuming that its the beginning of the title
return self.title[:7]
#security.declarePublic('history')
def history(self):
"""history"""
ext=self.ZopeFind(self.aq_parent,obj_ids=["history_template.html"])
if ext:
return getattr(self,ext[0][1].getId())()
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','versionHistory')).__of__(self)
return pt()
def getBasketFromId(self,basketid, context=None):
"""get basket from id"""
if not context:
context=self
for basket in self.ZopeFind(context,obj_metatypes=["CDLIBasket"]):
if basket[0]==basketid:
return basket[1]
else:
None
def isContainedInBaskets(self,context=None):
"""check is this file is part of any basket
@param context: (optional) necessessary if CDLIBasketCatalog is not an (inherited) attribute of self, context.CDLIBasketCatalog
has to exist.
"""
if not context:
context=self
ret=[]
for x in context.CDLIBasketCatalog.search({'getFileNamesInLastVersion':self.getId()}):
#if the basket x is deleted it seemes to be that x is sometimes still in the Catalog, why?
try:
ret.append(x.getObject())
except:
pass
return ret
#return [x.getObject() for x in context.CDLIBasketCatalog.search({'getFileNamesInLastVersion':self.getId()})]
def _newContentObject(self, id, title='', versionNumber=0, versionComment=None, time=None, author=None):
"""factory for content objects. to be overridden in derived classes."""
logging.debug("_newContentObject(CDLI)")
return CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=versionComment,time=time,author=author)
def addCDLIFileObjectForm(self):
"""add a new version"""
if str(self.REQUEST['AUTHENTICATED_USER']) in ["Anonymous User"]:
return "please login first"
if (self.lockedBy==self.REQUEST['AUTHENTICATED_USER']) or (self.lockedBy==''):
out=DTMLFile('dtml/fileAdd', globals(),Kind='CDLIFileObject',kind='CDLIFileObject',version=self.getVersion()).__of__(self)
return out()
else:
return "Sorry file is locked by somebody else"
def manage_addCDLIFileObject(self,id,vC,author,
file='',title='',
precondition='',
content_type='',
changeName='no',newName='',
come_from=None,
from_tmp=False,RESPONSE=None):
"""add"""
try: #TODO: der ganze vC unsinn muss ueberarbeitet werden
vC=self.REQUEST['vC']
except:
pass
ob = self.addContentObject(id, vC, author, file, title, changeName=changeName, newName=newName, from_tmp=from_tmp,
precondition=precondition, content_type=content_type)
try:
#FIXME: wozu ist das gut?
self.REQUEST.SESSION['objID_parent']=self.getId()
except:
pass
#self.cdliRoot.updateOrAddToFileBTree(self)# now update the object in the cache
if RESPONSE:
if ob.getSize()==0:
self.REQUEST.SESSION['objID']=ob.getId()
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','errorUploadFile')).__of__(self)
return pt()
else:
if come_from and (come_from!=""):
RESPONSE.redirect(come_from+"?change="+self.getId())
else:
RESPONSE.redirect(self.REQUEST['URL2']+'?uploaded=%s'%self.title)
else:
return ob
def manage_addCDLIFileForm(self):
"""interface for adding the OSAS_root"""
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addCDLIFile.zpt')).__of__(self)
return pt()
def manage_addCDLIFile(self,id,title,lockedBy, author=None, RESPONSE=None):
"""add the OSAS_root"""
newObj=CDLIFile(id,title,lockedBy,author)
tryToggle=True
tryCount=0
self._setObject(id,newObj)
getattr(self,id).reindex_object()
if RESPONSE is not None:
RESPONSE.redirect('manage_main')
def checkUTF8(data):
"""check utf 8"""
try:
data.encode('utf-8')
return True
except:
return False
def checkFile(filename,data,folder):
"""check the files"""
# first check the file name
fn=filename.split(".") # no extension
if not fn[0][0]=="P":
return False,"P missing in the filename"
elif len(fn[0])!=7:
return False,"P number has not the right length 6"
elif not checkUTF8(data):
return False,"not utf-8"
else:
return True,""
def splitatf(fh,dir=None,ext=None):
"""split it"""
ret=None
nf=None
i=0
#ROC: why split \n first and then \r???
if (type(fh) is StringType) or (type(fh) is UnicodeType):
iter=fh.split("\n")
else:
iter=fh.readlines()
for lineTmp in iter:
lineTmp=lineTmp.replace(codecs.BOM_UTF8,'') # make sure that all BOM are removed..
for line in lineTmp.split("\r"):
#logging.log("Deal with: %s"%line)
if ext:
i+=1
if (i%100)==0:
ext.result+="."
if i==10000:
i=0
ext.result+="<br>"
#check if basket name is in the first line
if line.find("#atf basket")>=0: #old convention
ret=line.replace('#atf basket ','')
ret=ret.split('_')[0]
elif line.find("#basket:")>=0: #new convention
ret=line.replace('#basket: ','')
ret=ret.split('_')[0]
else:
if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile
if nf:
nf.close() #close last file
filename=line[1:].split("=")[0].rstrip()+".atf"
if dir:
filename=os.path.join(dir,filename)
nf=file(filename,"w")
logging.info("open %s"%filename)
if nf:
nf.write(line.replace("\n","")+"\n")
try:
nf.close()
except:
pass
if not((type(fh) is StringType) or (type(fh) is UnicodeType)):
fh.close()
return ret,len(os.listdir(dir))
class CDLIFileFolder(extVersionedFileFolder):
"""CDLI File Folder"""
security=ClassSecurityInfo()
meta_type="CDLI Folder"
file_meta_type=['CDLI file']
folder_meta_type=['CDLI Folder']
file_catalog='CDLICatalog'
#downloadCounter=0 # counts how many download for all files currently run, be mehr als 5 wird verweigert.
tmpStore2={}
def _newVersionedFile(self, id, title='', lockedBy=None, author=None):
"""factory for versioned files. to be overridden in derived classes."""
logging.debug("_newVersionedFile(CDLI)")
return CDLIFile(id, title, lockedBy=lockedBy, author=author)
def setTemp(self,name,value):
"""set tmp"""
setattr(self,name,value)
deleteFileForm = PageTemplateFile("zpt/doDeleteFile", globals())
def delete(self,ids,REQUEST=None):
"""delete these files"""
if type(ids) is not ListType:
ids=[ids]
self.manage_delObjects(ids)
if REQUEST is not None:
return self.index_html()
def getVersionNumbersFromIds(self,ids):
"""get the numbers of the current versions of documents described by their ids"""
ret=[]
searchStr=" OR ".join(ids)
founds=self.CDLICatalog.search({'title':searchStr})
for found in founds:
lastVersion=found.getObject().getContentObject()
ret.append((found.getId,lastVersion))
return ret
def getFile(self,fn):
"""get the content of the file fn"""
logging.debug("getFile: %s"%repr(fn))
if not self.hasObject(fn):
# search deeper
founds=getattr(self, self.file_catalog).search({'textid':fn})
if founds:
obj=founds[0].getObject().getContentObject()
else:
return ""
else:
obj = self[fn].getContentObject()
return obj.getData()[0:]
def checkCatalog(self,fn):
"""check if fn is in the catalog"""
#TODO add checkCatalog
def findObjectsFromListWithVersion(self,list,author=None):
"""find objects from a list with versions
@param list: list of tuples (cdliFile,version)
"""
#self.REQUEST.SESSION['fileIds']=list#store fieldIds in session for further usage
#self.REQUEST.SESSION['searchList']=self.REQUEST.SESSION['fileIds']
pt=getattr(self,'filelistVersioned.html')
return pt(search=list,author=author)
def getAllPNumbers(self):
"""get a list of all files (resp their p-numbers) stored"""
ret=[x.getId for x in self.CDLICatalog()]
return ret
def expandFile(self,fileId,fileTree):
"""wildcard in fileID suche alle Treffer"""
founds=self.CDLICatalog({'title':fileId})
for found in founds:
fileTree.add(found.getId)
logging.debug("ADDD:"+found.getId)
def findObjectsFromList(self,enterList=None,display=False,start=None,upload=None,list=None,basketName=None,numberOfObjects=None,RESPONSE=None,REQUEST=None,returnHash=False,hash=None):
"""findObjectsFromList (, TAB oder LINE separated)"""
logging.debug("start: findObjectsFromList")
#logging.debug("start: findObjectsFromList"+repr(list))
if upload: # list from file upload
txt=upload.read()
if enterList:
txt=enterList
if upload or enterList:
txt=txt.replace(",","\n")
txt=txt.replace("\t","\n")
txt=txt.replace("\r","\n")
idsTmp=txt.split("\n")
ids=[]
for id in idsTmp: # make sure that no empty lines
idTmp=id.lstrip().rstrip()
if len(idTmp)>0:
ids.append(idTmp)
#self.REQUEST.SESSION['ids']=" OR ".join(ids)
pt=getattr(self,'filelist.html')
self.REQUEST.SESSION['searchList']=ids
return pt(search=ids)
if basketName:
#TODO: get rid of one of these..
pt=getattr(self,'filelist.html')
return pt(basketName=basketName,numberOfObjects=numberOfObjects)
result =self.CDLICache.retrieve(hash)
if result:
logging.debug("give result from storage2")
return hash,result
if list is not None: # got already a list
logging.debug(" ----List version")
ret=[]
fileTree=Set()
for fileId in list:
if fileId.find("*")>-1: #check for wildcards
self.expandFile(fileId,fileTree)
elif len(fileId.split("."))==1:
fileId=fileId+".atf"
fileTree.add(fileId)
#logging.debug(" -----:"+fileId)
#ret+=self.CDLICatalog({'title':fileId})
#x =self.getFileObject(fileId)
#if x is not None:
# ret.append(x)
ids = fileTree & self.v_file_ids
#self.REQUEST.SESSION['fileIds']=ids#store fieldIds in session for further usage
l=makelist(fileTree)[0:]
#logging.debug("l-list:"+repr(l))
self.REQUEST.SESSION['fileIds']=l#store fieldIds in session for further usage
self.REQUEST.SESSION['searchList']=l
#self.REQUEST.SESSION['searchList']=['P000001.atf']
hash = md5.new(repr(makelist(fileTree))).hexdigest() # erzeuge hash als identification
self.REQUEST.SESSION['hash']=hash
#TODO: do I need garbage collection for v_tmpStore ?
#logging.debug("Hash:"+repr(hash))
#
# if hasattr(self.cdliRoot,'v_tmpStore') and self.cdliRoot.v_tmpStore.has_key(hash):
# logging.debug("asking for storage")
# res=self.cdliRoot.v_tmpStore[hash]
# if res:
# if returnHash == True:
# return hash,res
# return res
#TODO: get rid of one of these..
#ids=[x.getObject().getId() for x in ret]
ret=[(self.getFileObject(x),self.getFileObjectLastVersion(x)) for x in ids]
#self.REQUEST.SESSION['fileIds']=ids#store fieldIds in session for further usage
#self.REQUEST.SESSION['searchList']=self.REQUEST.SESSION['fileIds']
if display:
pt=getattr(self,'filelist.html')
return pt(search=ids)
else:
#self.REQUEST.SESSION['hash'] = ret # store in session
#logging.debug("HHHHHHNEU:"+repr(self.makelist(ids)))
#logging.debug("HHHHHHNEU:"+repr(hash))
self.CDLICache.store(hash,ret)
if returnHash == True:
return hash,ret
return ret
if start:
RESPONSE.redirect("filelist.html?start:int="+str(start))
security.declareProtected('Manage','createAllFilesAsSingleFile')
def createAllFilesAsSingleFile(self,RESPONSE=None):
"""download all files"""
def sortF(x,y):
return cmp(x[0],y[0])
catalog=getattr(self,self.file_catalog)
#tf,tfilename=mkstemp()
if not hasattr(self.temp_folder,'downloadCounter'):
self.temp_folder.downloadCounter=0
if getattr(self.temp_folder,'downloadCounter',0) > 5:
return """I am sorry, currently the server has to many requests for downloads, please come back later!"""
self.temp_folder.downloadCounter+=1
self._p_changed=1
transaction.get().commit()
list=[(x.getId,x) for x in catalog()]
list.sort(sortF)
RESPONSE.setHeader("Content-Disposition","""attachement; filename=%s"""%"all.atf")
RESPONSE.setHeader("Content-Type","application/octet-stream")
tmp=""
for l in list:
obj=l[1].getObject()
if obj.meta_type=="CDLI file":
#os.write(tf,obj.getLastVersion().data)
if RESPONSE:
RESPONSE.write(obj.getData()[0:])
RESPONSE.write("\n")
self.temp_folder.downloadCounter-=1
self._p_changed=1
transaction.get().commit()
#os.close(tf)
#RESPONSE.redirect(self.absolute_url()+"/downloadFile?fn="%tfilename)
return True
def downloadFile(self,fn):
"""download fn - not used yet"""
self.REQUEST.RESPONSE.setHeader("Content-Disposition","""attachement; filename=%s"""%self.getLastVersion().getId())
self.REQUEST.RESPONSE.setHeader("Content-Type","application/octet-stream")
self.REQUEST.RESPONSE.write(file(fn).read())
def hasParent(self):
"""returns true falls subfolder"""
if self.aq_parent.meta_type in self.folder_meta_type:
return True
else:
return False
def getFolders(self):
"""get all subfolders"""
ret=[]
folders=self.ZopeFind(self,obj_metatypes=self.folder_meta_type)
for folder in folders:
ret.append((folder[1],
len(self.ZopeFind(folder[1],obj_metatypes=self.folder_meta_type)),
len(self.ZopeFind(folder[1],obj_metatypes=self.file_meta_type))
))
return ret
security.declareProtected('manage','index_html')
def index_html(self):
"""main"""
ext=self.ZopeFind(self,obj_ids=["index.html"])
if ext:
return ext[0][1]()
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','CDLIFileFolderMain')).__of__(self)
return pt()
manage_addCDLIFileFolderForm=DTMLFile('dtml/folderAdd', globals())
def manage_addCDLIFileFolder(self, id, title='',
createPublic=0,
createUserF=0,
REQUEST=None):
"""Add a new Folder object with id *id*.
If the 'createPublic' and 'createUserF' parameters are set to any true
value, an 'index_html' and a 'UserFolder' objects are created respectively
in the new folder.
"""
ob=CDLIFileFolder()
ob.id=str(id)
ob.title=title
self._setObject(id, ob)
ob=self._getOb(id)
checkPermission=getSecurityManager().checkPermission
if createUserF:
if not checkPermission('Add User Folders', ob):
raise Unauthorized, (
'You are not authorized to add User Folders.'
)
ob.manage_addUserFolder()
if REQUEST is not None:
return self.manage_main(self, REQUEST, update_menu=1)
class CDLIRoot(Folder):
"""main folder for cdli"""
meta_type="CDLIRoot"
downloadCounterBaskets=0 # counts the current basket downloads if counter > 10 no downloads are possible
file_catalog = 'CDLICatalog'
# word splitter for search
splitter = {'words':cdliSplitter.wordSplitter(),
'graphemes':cdliSplitter.graphemeSplitter()}
def redirect(self,RESPONSE,url):
"""mache ein redirect mit einem angehaengten time stamp um ein reload zu erzwingen"""
timeStamp=time.time()
if url.find("?")>-1: #giebt es schon parameter
addStr="&time=%s"
else:
addStr="?time=%s"
RESPONSE.setHeader('Last-Modified',email.Utils.formatdate().split("-")[0]+'GMT')
logging.error(email.Utils.formatdate()+' GMT')
RESPONSE.redirect(url+addStr%timeStamp)
def unicodify(self,txt):
return unicodify(txt)
def invalidateOldCacheVersion(self):
"""loescht die alte Version des Cache"""
del self.v_tmpStore
return "done"
def viewATF(self,id,RESPONSE):
"""view an Object"""
ob = self.CDLICatalog({'title':id})
logging.debug(ob[0].getObject().getLastVersion().absolute_url()+"/view")
if len(ob)>0:
RESPONSE.redirect(ob[0].getObject().getLastVersion().absolute_url()+"/view")
return "not found"
def history(self,id,RESPONSE):
"""view an Object"""
ob = self.CDLICatalog({'title':id})
if len(ob)>0:
RESPONSE.redirect(ob[0].absolute_url+"/history")
return "not found"
def downloadLocked(self,id,RESPONSE):
"""view an Object"""
ob = self.CDLICatalog({'title':id})
if len(ob)>0:
RESPONSE.redirect(ob[0].absolute_url+"/downloadLocked")
return "not found"
def download(self,id,RESPONSE):
"""view an Object"""
ob = self.CDLICatalog({'title':id})
if len(ob)>0:
logging.info("objekt:"+repr(ob[0]))
#RESPONSE.redirect(ob[0].getLastVersion().absolute_url())
RESPONSE.redirect(ob[0].absolute_url+"/download")
return "not found"
def addCDLIFileObjectForm(self,id,RESPONSE):
"""view an Object"""
ob = self.CDLICatalog({'title':id})
if len(ob)>0:
RESPONSE.redirect(ob[0].absolute_url+"/addCDLIFileObjectForm")
return "not found"
def addVersionedFileObjectForm(self,id,RESPONSE):
"""view an Object"""
ob = self.CDLICatalog({'title':id})
if len(ob)>0:
RESPONSE.redirect(ob[0].absolute_url+"/addVersionedFileObjectForm")
return "not found"
def unlock(self,id,RESPONSE):
"""view an Object"""
ob = self.CDLICatalog({'title':id})
if len(ob)>0:
RESPONSE.redirect(ob[0].absolute_url+"/unlock")
return "not found"
def getFileObject(self,fileId):
"""get an object"""
if isinstance(fileId,CDLIFileObject): # support for old baskets
return fileId
x=self.v_files.get(fileId)
#logging.debug("getFileObj:"+repr(fileId))
return x
def getFileObjectLastVersion(self,fileId):
"""get an object"""
x=self.v_files_lastVersion.get(fileId)
#logging.debug("lastVersion: "+repr(x))
return x
def showFileIds(self):
"""showIds"""
return self.v_file_ids
def generateFileBTree(self):
"""erzeuge einen Btree aus allen Files"""
self.v_files = OOBTree()
self.v_files_lastVersion = OOBTree()
self.v_file_ids = Set()
for x in self.CDLICatalog.searchResults():
self.v_files.update({x.getId:x.getObject()})
self.v_files_lastVersion.update({x.getId:x.getObject().getLastVersion()})
self.v_file_ids.add(x.getId)
logging.debug("add:"+x.getId+"XXX"+repr(x.getObject()))
return True
def updateOrAddToFileBTree(self,obj):
"""update a BTree"""
self.v_files.update({obj.getId():obj})
self.v_files_lastVersion.update({obj.getId():obj.getLastVersion()})
self.v_file_ids.add(obj.getId())
logging.debug("update:"+obj.getId()+"XXX"+repr(obj))
def deleteFromBTree(self,objId):
"""delete an obj"""
self.v_files.pop(objId)
self.v_files_lastVersion.pop(objId)
self.v_file_ids.remove(objId)
def deleteFiles(self,ids):
"""delete files"""
for id in ids:
founds=self.CDLICatalog.search({'title':id.split(".")[0]})
if founds:
logging.debug("deleting %s"%founds)
folder=founds[0].getObject().aq_parent #get the parent folder of the object
logging.debug("deleting from %s"%folder)
cut=folder.delete([founds[0].getId]) #cut it out
def searchText(self, query, index='graphemes'):
"""searches query in the fulltext index and returns a list of file ids/P-numbers"""
# see also: http://www.plope.com/Books/2_7Edition/SearchingZCatalog.stx#2-13
logging.debug("searchtext for '%s' in index %s"%(query,index))
#import Products.ZCTextIndex.QueryParser
#qp = QueryParser.QueryParser()
#logging.debug()
idxQuery = {index:{'query':query}}
idx = getattr(self, self.file_catalog)
# do search
resultset = idx.search(query_request=idxQuery,sort_index='textid')
# put only the P-Number in the result
results = [res.getId[:7] for res in resultset]
logging.debug("searchtext: found %d texts"%len(results))
return results
def getFile(self, pnum):
"""get the translit file with the given pnum"""
f = getattr(self, self.file_catalog).search({'textid':pnum})
if not f:
return ""
return f[0].getObject().getData()
def showFile(self,fileId,wholePage=False):
"""show a file
@param fileId: P-Number of the document to be displayed
"""
f=getattr(self, self.file_catalog).search({'textid':fileId})
if not f:
return ""
if wholePage:
logging.debug("show whole page")
return f[0].getObject().getContentObject().view()
else:
return f[0].getObject().getLastVersionFormattedData()
def showWordInFile(self,fileId,word,indexName='graphemes',regExp=False,):
"""get lines with word from FileId"""
logging.debug("showwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId))
file = formatAtfFullLineNum(self.getFile(fileId))
ret=[]
# add whitespace before and whitespace and line-end to splitter bounds expressions
bounds = self.splitter[indexName].bounds
splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds)
# clean word expression
# TODO: this should use QueryParser itself
# take out double quotes
word = word.replace('"','')
# take out ignorable signs
ignorable = self.splitter[indexName].ignorex
word = ignorable.sub('', word)
# compile into regexp objects and escape parens
wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')]
for line in file.splitlines():
for word in wordlist:
#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line)))
if word.search(ignorable.sub('',line)):
line = formatAtfLineHtml(line)
ret.append(line)
break
return ret
def showWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False):
"""
get lines with word from all ids in list FileIds.
returns dict with id:lines pairs.
"""
logging.debug("showwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds))
return dict([(id,self.showWordInFile(id, word, indexName, regExp)) for id in fileIds])
def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False):
"""get text with word highlighted from FileId"""
logging.debug("tagwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId))
file=self.getFile(fileId)
tagStart=u'<span class="found">'
tagEnd=u'</span>'
tagStr=tagStart + u'%%s' + tagEnd
ret=[]
# add whitespace to splitter bounds expressions and compile into regexp object
bounds = self.splitter[indexName].bounds
wordsplit = re.compile("(%s|\s)"%bounds)
# clean word expression
# TODO: this should use QueryParser itself
word = word.replace('"','') # take out double quotes
# take out ignoreable signs
ignorable = self.splitter[indexName].ignorex
word = ignorable.sub('', word)
# split search terms by blanks
words = word.split(' ')
# split search terms again (for grapheme search with words)
splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words))
for line in file.splitlines():
line = unicodify(line)
# ignore lemma and other lines
if line.lstrip().startswith('#lem:'):
continue
# ignore p-num line
if line.startswith('&P'):
continue
# ignore version lines
if line.startswith('#version'):
continue
# ignore atf type lines
if line.startswith('#atf:'):
continue
# first scan
hitwords = []
for w in words:
if ignorable.sub('',line).find(w) > -1:
# word is in line
# append split word for grapheme search with words
hitwords.extend(splitwords[w])
#hitwords.extend(wordsplit.split(w))
# examine hits closer
if hitwords:
# split line into words
parts = wordsplit.split(line)
line = ""
for p in parts:
#logging.debug("tagwordinfile: searching for %s in %s"%(p,hitwords))
# reassemble line
if ignorable.sub('', p) in hitwords:
#logging.debug("tagwordinfile: found %s in %s"%(p,hitwords))
# this part was found
line += tagStart + formatAtfHtml(p) + tagEnd
else:
line += formatAtfHtml(p)
else:
# no hits
line = formatAtfHtml(line)
ret.append(line)
return u'<br>\n'.join(ret)
def tagWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False):
"""
get texts with highlighted word from all ids in list FileIds.
returns dict with id:text pairs.
"""
logging.debug("tagwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds))
return dict([(id,self.tagWordInFile(id, word, indexName, regExp)) for id in fileIds])
def getFileVersionList(self, pnum):
"""get the version history as a list for the translit file with the given pnum"""
f = getattr(self, self.file_catalog).search({'textid':pnum})
if not f:
return []
return f[0].getObject().getVersionList()
def URLquote(self,str):
"""quote url"""
return urllib.quote(str)
def URLunquote(self,str):
"""unquote url"""
return urllib.unquote(str)
def URLquote_plus(self,str):
"""quote url"""
return urllib.quote_plus(str)
def URLunquote_plus(self,str):
"""unquote url"""
return urllib.unquote_plus(str)
def changeUserForPObjectFromFile(self,user,fname):
"""aendere user fuer alle p in fiele"""
pns = file(os.path.join(package_home(globals()),'inputs',fname),"r")
for p in pns.readlines():
p=p.lstrip().rstrip()
logging.info(str(p+".atf"))
pObj=self.getFileObject(p+".atf")
if pObj is not None:
logging.info(pObj)
lv=pObj.getContentObject()
logging.info("author:"+lv.author)
lv.author=user
lv.versionComment="XXXXXXX"
gen
def forceunlock(self,REQUEST=None,user=None):
"break all locks"
ret=[]
for f in self.ZopeFind(self,obj_metatypes="CDLI file",search_sub=1):
un=f[1].forceunlock(user=user)
logging.info("check:"+f[0])
if un and un !="":
ret.append((f[0],un))
if REQUEST is not None:
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','breakLockResponse.zpt')).__of__(self)
return pt(ret=ret)
return ret
def getLockedFiles(self,REQUEST=None,user=None):
"""hole alle gesperrten files"""
ret={}
for f in self.ZopeFind(self,obj_metatypes="CDLI file",search_sub=1):
lb = f[1].lockedBy
add=False
if (lb is not None) and (lb!=""):
if user is None:
add=True
else:
if str(lb)==user:
add=True
if add==True:
if not ret.has_key(lb):
ret[lb]=[]
ret[lb].append(f[0])
if REQUEST is not None:
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','showlockResponse.zpt')).__of__(self)
return pt(ret=ret)
return ret
def getChangesByAuthor(self,author,n=100):
"""getChangesByAuthor"""
zcat=self.CDLIObjectsCatalog
res=zcat({'lastEditor':author,
'sort_on':'getTime',
'sort_order':'descending',
'sort_limit':n})[:n ]
return res
def getChangesByAuthor_html(self,author,n=100):
"""html output for changes by author"""
tmp={}
list=[]
for x in self.getChangesByAuthor(author):
nr=x.getObject().getVersionNumber()
id=x.getObject().aq_parent.getId()
#hinzufuegen, wenn Version neuer als die
if tmp.get(id,(0,0))[1] < nr:
tmp[id]=(x.getObject().aq_parent,nr)
return self.cdli_main.findObjectsFromListWithVersion(list=tmp.values(),author=author)
def getLastChanges(self,n=100):
"""get the last n changes"""
n=int(n)
zcat=self.CDLICatalog
return zcat({'sort_on':'getLastChangeDate',
'sort_order':'descending',
'sort_limit':n})[:n ]
def getLastChanges_html(self,n=100):
"""get the last n changes"""
list = [x.getId for x in self.getLastChanges(n)]
return self.cdli_main.findObjectsFromList(list=list,display=True)
def refreshTxt(self,txt="",threadName=None):
"""txt fuer refresh"""
return """ 2;url=%s?repeat=%s """%(self.absolute_url()+txt,threadName)
def refreshTxtBasket(self,txt="",threadName=None):
"""txt fuer refresh"""
return """ 2;url=%s?repeat=%s """%(txt,threadName)
def getResult(self,threadName=None):
"""result of thread"""
try:
return self._v_uploadATF[threadName].getResult()
except:
return "One moment, please"
def checkThreads(self):
"""check threads"""
ret="<html><body>"
for thread in threading.enumerate():
ret+="<p>%s (%s): %s</p>"%(repr(thread),thread.getName(),thread.isAlive())
return ret
def uploadATFRPC(self,data,username):
"""upload an atffile via xml-rpc"""
uploader=uploadATFThread()
#generate an random id for the upload object
from random import randint
if (not self.REQUEST.SESSION.get('idTmp',None)):
idTmp=str(randint(0,1000000000))
self.REQUEST.SESSION['idTmp']=idTmp
else:
idTmp=self.REQUEST.SESSION.get('idTmp',None)
uploader.set(data,0,username,idTmp)
stObj=uploader.run()
processor=uploadATFfinallyThread()
basketname=stObj.returnValue['basketNameFromFile']
processor.set("uploadchanged",basketname=basketname,SESSION=stObj.returnValue,username=username,serverport=self.REQUEST['SERVER_PORT'])
processor.run()
return generateXMLReturn(stObj.returnValue)
def uploadATF(self,repeat=None,upload=None,basketId=0,RESPONSE=None):
"""upload an atf file / basket file"""
#self._v_uploadATF.returnValue=None
#generate an random id for the upload thread
from random import randint
if (not self.REQUEST.SESSION.get('idTmp',None)):
idTmp=str(randint(0,1000000000))
self.REQUEST.SESSION['idTmp']=idTmp
else:
idTmp=self.REQUEST.SESSION.get('idTmp',None)
threadName=repeat
if not threadName or threadName=="":
#new thread not called from the waiting page
tmpVar=False
thread=uploadATFThread()
threadName=thread.getName()[0:]
if (not hasattr(self,'_v_uploadATF')):
self._v_uploadATF={}
self._v_uploadATF[threadName]=thread
#self._xmltrans.start()
#thread=Thread(target=self._v_uploadATF)
logging.info("set thread. extern")
self._v_uploadATF[threadName].set(upload,basketId,self.REQUEST['AUTHENTICATED_USER'],idTmp,serverport=self.REQUEST['SERVER_PORT'])
#thread.start()
logging.info("start thread. extern")
self._v_uploadATF[threadName].start()
self.threadName=self._v_uploadATF[threadName].getName()[0:]
wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template'])
if wait_template:
return wait_template[0][1]()
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadATFWait.zpt')).__of__(self)
return pt(txt='/uploadATF',threadName=threadName)
#_v_xmltrans.run()
else:
#recover thread, if lost
if (not hasattr(self,'_v_uploadATF')):
self._v_uploadATF={}
if not self._v_uploadATF.get(threadName,None):
for thread in threading.enumerate():
if threadName == thread.getName():
self._v_uploadATF[threadName]=thread
if self._v_uploadATF.get(threadName,None) and (not self._v_uploadATF[threadName].returnValue):
wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template'])
if wait_template:
return wait_template[0][1]()
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadATFWait.zpt')).__of__(self)
return pt(txt='/uploadATF',threadName=threadName)
else:
tmp=getattr(self.temp_folder,idTmp).returnValue
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadCheck.zpt')).__of__(self)
return pt(changed=tmp['changed'],lockerrors=tmp['lockerrors'],errors=tmp['errors'],dir=tmp['dir'],newPs=tmp['newPs'],basketLen=tmp['basketLen'],numberOfFiles=tmp['numberOfFiles'],
basketNameFromId=tmp['basketNameFromId'],basketNameFromFile=tmp['basketNameFromFile'],basketId=tmp['basketId'])
def redoUpload(self,threadName):
"""redo the upload"""
tmp=self.cdli_main.tmpStore2[threadName]
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadCheck.zpt')).__of__(self)
return pt(changed=tmp['changed'],lockerrors=tmp['lockerrors'],errors=tmp['errors'],dir=tmp['dir'],newPs=tmp['newPs'],basketLen=tmp['basketLen'],numberOfFiles=tmp['numberOfFiles'],
basketNameFromId=tmp['basketNameFromId'],basketNameFromFile=tmp['basketNameFromFile'],basketId=tmp['basketId'])
def uploadATFfinally(self,procedure='',comment="",basketname='',unlock=None,repeat=None,RESPONSE=None):
"""nowupload the files"""
threadName=repeat
if not threadName or threadName=="":
thread=uploadATFfinallyThread()
threadName=thread.getName()[0:]
if (not hasattr(self,'_v_uploadATF')):
self._v_uploadATF={}
self._v_uploadATF[threadName]=thread
idTmp=self.REQUEST.SESSION['idTmp']
stObj=getattr(self.temp_folder,idTmp)
self._v_uploadATF[threadName].set(procedure,comment=comment,basketname=basketname,unlock=unlock,SESSION=stObj.returnValue,username=self.REQUEST['AUTHENTICATED_USER'],serverport=self.REQUEST['SERVER_PORT'])
self._v_uploadATF[threadName].start()
wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template'])
if wait_template:
return wait_template[0][1]()
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadATFWait.zpt')).__of__(self)
return pt(txt='/uploadATFfinally',threadName=threadName)
#_v_xmltrans.run()
else:
#recover thread, if lost
if not hasattr(self,'_v_uploadATF'):
self._v_uploadATF={}
if not self._v_uploadATF.get(threadName,None):
for thread in threading.enumerate():
if threadName == thread.getName():
self._v_uploadATF[threadName]=thread
if self._v_uploadATF.get(threadName,None) and (self._v_uploadATF[threadName] is not None) and (not self._v_uploadATF[threadName].end) :
wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template'])
if wait_template:
return wait_template[0][1]()
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadATFWait.zpt')).__of__(self)
return pt(txt='/uploadATFfinally',threadName=threadName)
else:
idTmp=self.REQUEST.SESSION['idTmp']
stObj=getattr(self.temp_folder,idTmp)
self.REQUEST.SESSION['idTmp']=None
#update changed
logging.debug("dir:"+repr(stObj.returnValue['changed']))
for x in stObj.returnValue['changed']:
ob=self.CDLICatalog.search({'title':x[0]})
try:
self.cdliRoot.updateOrAddToFileBTree(ob[0].getObject())
except:
logging.error("uploadATFfinally - cannot update Object %s Error: %s %s"%(ob[1],sys.exc_info()[0],sys.exc_info()[1]))
if RESPONSE is not None:
RESPONSE.redirect(self.absolute_url())
def importFiles(self,comment="",author="" ,folderName="/Users/dwinter/atf", files=None,ext=None):
"""import files"""
logging.debug("importFiles folderName=%s files=%s ext=%s"%(folderName,files,ext))
root=self.cdli_main
count=0
if not files:
files=os.listdir(folderName)
for f in files:
folder=f[0:3]
f2=f[0:5]
#check if main folder PXX already exists
obj=self.ZopeFind(root,obj_ids=[folder])
logging.debug("importFiles: folder=%s f2=%s obj=%s"%(folder,f2,obj))
if ext:
ext.result="<p>adding: %s </p>"%f+ext.result
if not obj: # if not create it
manage_addCDLIFileFolder(root,folder,folder)
fobj=getattr(root,folder)
#transaction.get().commit()
else:
fobj=obj[0][1]
#check IF PYYYYY already exist
obj2=fobj.ZopeFind(fobj,obj_ids=[f2])
logging.debug("importFiles: fobj=%s obj2=%s"%(fobj,obj2))
if not obj2:# if not create it
manage_addCDLIFileFolder(fobj,f2,f2)
fobj2=getattr(fobj,f2)
else:
fobj2=obj2[0][1]
# not add the file
file2=os.path.join(folderName,f)
id=f
logging.debug("importFiles: addCDLIFile fobj2=%s, f=%s file2=%s"%(fobj2,repr(f),repr(file2)))
fobj2.addFile(vC='',file=file(file2),author=author,newName=f)
count+=1
#now add the file to the storage
ob = getattr(fobj2,f)
self.cdliRoot.updateOrAddToFileBTree(ob)
if count%100==0:
logging.debug("importfiles: committing")
transaction.get().commit()
transaction.get().commit()
return "ok"
manage_addCDLIRootForm=DTMLFile('dtml/rootAdd', globals())
def manage_addCDLIRoot(self, id, title='',
createPublic=0,
createUserF=0,
REQUEST=None):
"""Add a new Folder object with id *id*.
If the 'createPublic' and 'createUserF' parameters are set to any true
value, an 'index_html' and a 'UserFolder' objects are created respectively
in the new folder.
"""
ob=CDLIRoot()
ob.id=str(id)
ob.title=title
try:
self._setObject(id, ob)
except:
pass
ob=self._getOb(id)
checkPermission=getSecurityManager().checkPermission
if createUserF:
if not checkPermission('Add User Folders', ob):
raise Unauthorized, (
'You are not authorized to add User Folders.'
)
ob.manage_addUserFolder()
if REQUEST is not None:
return self.manage_main(self, REQUEST, update_menu=1)
import cdli_basket
# Die folgenden Klassen sollte nicht mehr aus diesem Paket benutzt werden, sondern direkt aus
# cdli_basket importiert werden.
class uploadATFfinallyThread(cdli_basket.uploadATFfinallyThread):
"""depricates"""
pass
class tmpStore(cdli_basket.tmpStore):
"""depricated"""
pass
class uploadATFThread(cdli_basket.uploadATFThread):
"""depricated"""
pass
class CDLIBasketContainer(cdli_basket.CDLIBasketContainer):
"""depricated"""
pass
class CDLIBasket(cdli_basket.CDLIBasket):
"""depricated"""
pass
class CDLIBasketVersion(cdli_basket.CDLIBasketVersion):
"""depricated"""
pass
class BasketContent(cdli_basket.BasketContent):
"""depricated"""
pass
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>