--- cdli/cdli_files.py 2007/03/23 13:58:10 1.73
+++ cdli/cdli_files.py 2007/10/19 16:25:06 1.80.2.2
@@ -26,6 +26,30 @@ import logging
import transaction
import copy
import codecs
+import sys
+
+def unicodify(s):
+ """decode str (utf-8 or latin-1 representation) into unicode object"""
+ if not s:
+ return u""
+ if isinstance(s, str):
+ try:
+ return s.decode('utf-8')
+ except:
+ return s.decode('latin-1')
+ else:
+ return s
+
+def utf8ify(s):
+ """encode unicode object or string into byte string in utf-8 representation.
+ assumes string objects to be utf-8"""
+ if not s:
+ return ""
+ if isinstance(s, str):
+ return s
+ else:
+ return s.encode('utf-8')
+
def generateXMLReturn(hash):
"""erzeugt das xml file als returnwert fuer uploadATFRPC"""
@@ -132,7 +156,7 @@ def unique(s):
class BasketContent(SimpleItem):
"""classe fuer den Inhalt eines Baskets"""
-
+
def __init__(self,content=[]):
"""content"""
self.contentList=content[0:]
@@ -530,6 +554,7 @@ class CDLIBasketContainer(OrderedFolder)
return ret
+ security.declareProtected('manage','getBasketAsOneFile')
def getBasketAsOneFile(self,basketName,current="no"):
"""returns all files of the basket combined in one file
@param basketName: Name of the basket
@@ -553,6 +578,7 @@ class CDLIBasketContainer(OrderedFolder)
ret+=str(founds[0].getObject().getLastVersion().getData())+"\n"
return ret
+ security.declareProtected('manage','upDateBaskets')
def upDateBaskets(self):
"""update content in to objects"""
@@ -623,7 +649,7 @@ class CDLIBasketContainer(OrderedFolder)
return pt(basketId=basketId,basketName=basketName)
- security.declareProtected('View','index_html')
+ security.declareProtected('manage','index_html')
def index_html(self):
"""stanadard ansicht"""
@@ -827,7 +853,7 @@ class CDLIBasketContainer(OrderedFolder)
"""get name of the actualuser"""
return str(self.REQUEST['AUTHENTICATED_USER'])
-
+ security.declareProtected('manage','addBasket')
def addBasket(self,newBasketName):
"""add a new basket"""
@@ -883,7 +909,43 @@ class CDLIBasket(Folder,CatalogAware):
meta_type="CDLIBasket"
default_catalog="CDLIBasketCatalog"
+ def searchInBasket(self,indexName,searchStr,regExp=False):
+ """searchInBasket"""
+
+ lst=self.searchInLineIndexDocs(indexName,searchStr,uniq=True,regExp=regExp) #TODO: fix this
+ ret={}
+
+ lv=self.getLastVersion()
+
+ for obj in lv.content.getContent():
+ id=obj[1].getId().split(".")[0]
+ if id in lst:
+
+ ret[id]=self.showWordInFile(id,searchStr,lineList=self.getLinesFromIndex(indexName,searchStr,id,regExp=regExp),regExp=regExp,indexName=indexName)
+
+
+ pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','searchResultsInBasket')).__of__(self)
+ return pt(result=ret,indexName=indexName,regExp=regExp,word=searchStr)
+
+
+
+
+ def searchInBasket_v1(self,searchStr):
+ """search occurences of searchStr in files im basket"""
+ ret=[]
+ lv=self.getLastVersion()
+ logging.info("searching")
+ for obj in lv.content.getContent():
+ txt=obj[0].getData()
+ for x in txt.split("\n"):
+ logging.info("search %s"%x)
+ if re.match(searchStr,x):
+ ret.append(x)
+
+ return "\n".join(ret)
+
+
def getFile(self,obj):
return obj[1]
@@ -1204,7 +1266,7 @@ class CDLIBasketVersion(Implicit,Persist
self.REQUEST.RESPONSE.setHeader("Content-Length",length)
self.REQUEST.RESPONSE.write(ret)
- security.declareProtected('View','downloadObjectsAsOneFile')
+ security.declareProtected('manage','downloadObjectsAsOneFile')
def downloadObjectsAsOneFile(self,lock=None,procedure=None,REQUEST=None,check="yes",current="no"):
"""download all selected files in one file"""
@@ -1322,7 +1384,7 @@ class CDLIBasketVersion(Implicit,Persist
def __init__(self,id,user,comment="",basketContent=[]):
""" init a basket version"""
self.id=id
- self.coment=comment
+ self.comment=comment
self._setObject('content',BasketContent(basketContent))
#self.basketContent=basketContent[0:]a
self.user=user
@@ -1336,7 +1398,7 @@ class CDLIBasketVersion(Implicit,Persist
"""get Comment"""
return self.comment
- security.declareProtected('View','index_html')
+ security.declareProtected('manage','index_html')
def index_html(self):
"""view the basket"""
@@ -1388,9 +1450,8 @@ class CDLIFileObject(CatalogAware,extVer
security=ClassSecurityInfo()
-
- security.declarePublic('makeThisVersionCurrent')
-
+ security.declareProtected('manage','index_html')
+
def PrincipiaSearchSource(self):
"""Return cataloguable key for ourselves."""
return str(self)
@@ -1400,28 +1461,26 @@ class CDLIFileObject(CatalogAware,extVer
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','makeThisVersionCurrent.zpt')).__of__(self)
return pt()
+
+ security.declarePublic('makeThisVersionCurrent')
def makeThisVersionCurrent(self,comment,author,RESPONSE=None):
"""copy this version to current"""
parent=self.aq_parent
-
-
newversion=parent.manage_addCDLIFileObject('',comment,author)
newversion.manage_upload(self.getData())
if RESPONSE is not None:
RESPONSE.redirect(self.aq_parent.absolute_url()+'/history')
-
return True
- security.declarePublic('view')
-
def getFormattedData(self):
"""fromat text"""
data=self.getData()
# return re.sub("\s\#lem"," #lem",data) #remove return vor #lem
return re.sub("#lem"," #lem",data) #remove return vor #lem
+ security.declarePublic('view')
def view(self):
"""view file"""
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','viewCDLIFile.zpt')).__of__(self)
@@ -1457,10 +1516,10 @@ class CDLIFileObject(CatalogAware,extVer
manage_addCDLIFileObjectForm=DTMLFile('dtml/fileAdd', globals(),Kind='CDLIFileObject',kind='CDLIFileObject', version='1')
-def manage_addCDLIFileObject(self,id,vC='',author='', file='',title='',precondition='', content_type='',
+def manage_addCDLIFileObject(self,id,vC='',author='', file='',title='',versionNumber=0,
+ precondition='', content_type='',
from_tmp=False,REQUEST=None):
"""Add a new File object.
-
Creates a new File object 'id' with the contents of 'file'"""
id=str(id)
@@ -1473,47 +1532,50 @@ def manage_addCDLIFileObject(self,id,vC=
self=self.this()
# First, we create the file without data:
- self._setObject(id, CDLIFileObject(id,title,'',content_type, precondition))
- self._getOb(id).versionComment=str(vC)
- self._getOb(id).time=time.localtime()
-
- setattr(self._getOb(id),'author',author)
-
+ self._setObject(id, CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=vC,time=time.localtime(),author=author))
+ fob = self._getOb(id)
# Now we "upload" the data. By doing this in two steps, we
# can use a database trick to make the upload more efficient.
if file and not from_tmp:
- self._getOb(id).manage_upload(file)
+ fob.manage_upload(file)
elif file and from_tmp:
- self._getOb(id).manage_upload_from_tmp(file)
+ fob.manage_file_upload(file) # manage_upload_from_tmp doesn't exist in ExtFile2
+ # fob.manage_upload_from_tmp(file) # manage_upload_from_tmp doesn't exist in ExtFile2
if content_type:
- self._getOb(id).content_type=content_type
+ fob.content_type=content_type
+ logging.debug("manage_add: lastversion=%s"%self.getData())
+ logging.debug("reindex1: %s in %s"%(repr(self),repr(self.default_catalog)))
self.reindex_object()
- self._getOb(id).reindex_object()
+ logging.debug("manage_add: fob_data=%s"%fob.getData())
+ logging.debug("reindex2: %s in %s"%(repr(fob), repr(fob.default_catalog)))
+ fob.index_object()
if REQUEST is not None:
REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')
+
class CDLIFile(extVersionedFile,CatalogAware):
"""CDLI file"""
+ security=ClassSecurityInfo()
meta_type="CDLI file"
+ content_meta_type = ["CDLI File Object"]
+
default_catalog='CDLICatalog'
+ security.declareProtected('manage','index_html')
- #security.declarePublic('history')
def getLastVersionData(self):
"""get last version data"""
- return self.getLastVersion().getData()
+ return self.getData()
def getLastVersionFormattedData(self):
"""get last version data"""
- return self.getLastVersion().getFormattedData()
+ return self.getContentObject().getFormattedData()
#security.declarePublic('history')
-
-
def history(self):
"""history"""
@@ -1558,6 +1620,11 @@ class CDLIFile(extVersionedFile,CatalogA
#return [x.getObject() for x in context.CDLIBasketCatalog.search({'getFileNamesInLastVersion':self.getId()})]
+ def _newContentObject(self, id, title='', versionNumber=0, versionComment=None, time=None, author=None):
+ """factory for content objects. to be overridden in derived classes."""
+ return CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=versionComment,time=time,author=author)
+
+
def addCDLIFileObjectForm(self):
"""add a new version"""
@@ -1583,58 +1650,27 @@ class CDLIFile(extVersionedFile,CatalogA
except:
pass
-
- if changeName=="yes":
- filename=file.filename
- self.title=filename[max(filename.rfind('/'),
- filename.rfind('\\'),
- filename.rfind(':'),
- )+1:]
-
+ ob = self.addContentObject(id, vC, author, file, title, changeName=changeName, newName=newName, from_tmp=from_tmp,
+ precondition=precondition, content_type=content_type)
- if not newName=='':
- self.title=newName[0:]
-
-
-
-
- positionVersionNum=getattr(self,'positionVersionNum','front')
-
- if positionVersionNum=='front':
- id="V%i"%self.getVersion()+"_"+self.title
- else:
- tmp=os.path.splitext(self.title)
- if len(tmp)>1:
- id=tmp[0]+"_V%i"%self.getVersion()+tmp[1]
- else:
- id=tmp[0]+"_V%i"%self.getVersion()
-
-
- manage_addCDLIFileObject(self,id,vC,author,file,id,precondition, content_type,from_tmp=from_tmp)
- #objs=self.ZopeFind(self,obj_ids=[id])[0][1].setVersionNumber(int(self.getVersion()))
- objs=getattr(self,id).setVersionNumber(int(self.getVersion()))
try:
- #FIXME: wozu ist das gut?
- self.REQUEST.SESSION['objID_parent']=self.getId()
+ #FIXME: wozu ist das gut?
+ self.REQUEST.SESSION['objID_parent']=self.getId()
except:
- pass
+ pass
if RESPONSE:
-
- obj=self.ZopeFind(self,obj_ids=[id])[0][1]
- if obj.getSize()==0:
- self.REQUEST.SESSION['objID']=obj.getId()
+ if ob.getSize()==0:
+ self.REQUEST.SESSION['objID']=ob.getId()
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','errorUploadFile')).__of__(self)
return pt()
-
else:
if come_from and (come_from!=""):
- RESPONSE.redirect(come_from+"?change="+self.getId())
+ RESPONSE.redirect(come_from+"?change="+self.getId())
else:
RESPONSE.redirect(self.REQUEST['URL2']+'?uploaded=%s'%self.title)
-
else:
- return self.ZopeFind(self,obj_ids=[id])[0][1]
+ return ob
def manage_addCDLIFileForm(self):
@@ -1648,8 +1684,6 @@ def manage_addCDLIFile(self,id,title,loc
tryToggle=True
tryCount=0
-
-
self._setObject(id,newObj)
getattr(self,id).reindex_object()
@@ -1740,12 +1774,14 @@ class CDLIFileFolder(extVersionedFileFol
security=ClassSecurityInfo()
meta_type="CDLI Folder"
- filesMetaType=['CDLI file']
- folderMetaType=['CDLI Folder']
+ file_meta_type=['CDLI file']
+ folder_meta_type=['CDLI Folder']
+
default_catalog='CDLICatalog'
- defaultFileCatalog=default_catalog #wenn dieses definiert ist, wird beim hinzufgen einer neuen version eines files dieser catalog neuiniziert
+ defaultFileCatalog=default_catalog #wenn dieses definiert ist, wird beim hinzufuegen einer neuen version eines files dieser catalog neuindiziert
#downloadCounter=0 # counts how many download for all files currently run, be mehr als 5 wird verweigert.
tmpStore2={}
+
def setTemp(self,name,value):
"""set tmp"""
@@ -1753,21 +1789,13 @@ class CDLIFileFolder(extVersionedFileFol
def delete(self,ids):
- """delete this file, i.e. move into a trash folder"""
-
- found=self.ZopeFind(self,obj_ids=['.trash'])
-
- if len(found)<1:
- manage_addCDLIFileFolder(self, '.trash',title="Trash")
- trash=self._getOb('.trash')
- else:
- trash=found[0][1]
-
+ """delete these files"""
if type(ids) is not ListType:
ids=[ids]
- cut=self.manage_cutObjects(ids)
- trash.manage_pasteObjects(cut)
-
+
+ self.manage_delObjects(ids)
+
+
def getVersionNumbersFromIds(self,ids):
"""get the numbers of the current versions of documents described by their ids"""
@@ -1777,7 +1805,7 @@ class CDLIFileFolder(extVersionedFileFol
founds=self.CDLICatalog.search({'title':searchStr})
for found in founds:
- lastVersion=found.getObject().getLastVersion()
+ lastVersion=found.getObject().getContentObject()
ret.append((found.getId,lastVersion))
return ret
@@ -1788,7 +1816,7 @@ class CDLIFileFolder(extVersionedFileFol
if not founds:
return ""
else:
- obj=founds[0].getObject().getLastVersion()
+ obj=founds[0].getObject().getContentObject()
return obj.getData()[0:]
@@ -1916,7 +1944,7 @@ class CDLIFileFolder(extVersionedFileFol
#os.write(tf,obj.getLastVersion().data)
if RESPONSE:
- RESPONSE.write(obj.getLastVersion().getData()[0:])
+ RESPONSE.write(obj.getData()[0:])
RESPONSE.write("\n")
self.temp_folder.downloadCounter-=1
self._p_changed=1
@@ -1936,7 +1964,7 @@ class CDLIFileFolder(extVersionedFileFol
def hasParent(self):
"""returns true falls subfolder"""
- if self.aq_parent.meta_type in self.folderMetaType:
+ if self.aq_parent.meta_type in self.folder_meta_type:
return True
else:
return False
@@ -1944,16 +1972,16 @@ class CDLIFileFolder(extVersionedFileFol
def getFolders(self):
"""get all subfolders"""
ret=[]
- folders=self.ZopeFind(self,obj_metatypes=self.folderMetaType)
+ folders=self.ZopeFind(self,obj_metatypes=self.folder_meta_type)
for folder in folders:
ret.append((folder[1],
- len(self.ZopeFind(folder[1],obj_metatypes=self.folderMetaType)),
- len(self.ZopeFind(folder[1],obj_metatypes=self.filesMetaType))
+ len(self.ZopeFind(folder[1],obj_metatypes=self.folder_meta_type)),
+ len(self.ZopeFind(folder[1],obj_metatypes=self.file_meta_type))
))
return ret
- security.declareProtected('View','index_html')
+ security.declareProtected('manage','index_html')
def index_html(self):
"""main"""
ext=self.ZopeFind(self,obj_ids=["index.html"])
@@ -2000,162 +2028,21 @@ class CDLIRoot(Folder):
"""main folder for cdli"""
meta_type="CDLIRoot"
- downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible
+ downloadCounterBaskets=0 # counts the current basket downloads if counter > 10 no downloads are possible
def deleteFiles(self,ids):
- """delete files (resp. move into .trash folder)"""
- # find or generete trash folder
-
- found=self.ZopeFind(self,obj_ids=['.trash'])
-
- if len(found)<1:
- manage_addCDLIFileFolder(self, '.trash',title="Trash")
- trash=self._getOb('.trash')
- else:
- logging.info(found)
- trash=found[0][1]
-
-
+ """delete files"""
for id in ids:
founds=self.CDLICatalog.search({'title':id.split(".")[0]})
if founds:
- logging.info(founds)
+ logging.debug("deleting %s"%founds)
folder=founds[0].getObject().aq_parent #get the parent folder of the object
- logging.info(folder)
- cut=folder.manage_cutObjects([founds[0].getId]) #cut it out
- trash.manage_pasteObjects(cut) #paste it in the trash
+ logging.debug("deleting from %s"%folder)
+ cut=folder.delete([founds[0].getId]) #cut it out
- def findWordRegExp(self,indexName,searchTerm):
- """find all words in index which match regexp in SearchTerm
- @param indexName: name of the index to be searched in
- @param searchTerm: word to be searched"""
-
- ret=[]
- for x in self.lineIndexes[indexName].iterkeys():
- if re.match(searchTerm,x):
- ret.append(x)
- return ret
-
- def searchRegExpInLineIndexDocs(self,indexName,searchTerm):
- """search in inLineIndex with regexp
- @param indexName: name of the index to be searched in
- @param searchTerm: term to be searched
- """
- if not searchTerm:
- return []
- ret=[]
- words=self.findWordRegExp(indexName,searchTerm) # suche nach allen Treffern
- logging.info("wd:%s"%words)
- for word in words:
- ret+=self.searchInLineIndexDocs(indexName,word)
-
- return unique(ret)
-
- def showInLineIndex(self):
- """get the index for debug purposes"""
- print "show"
- for key in self.lineIndexes.keys():
- logging.info("index:%s"%key)
- for x in self.lineIndexes[key].iterkeys():
- logging.info("word:%s"%repr(x))
- #for y in self.lineIndex[x].iterkeys():
- # print "doc",repr(y),repr(self.lineIndex[x][y])
-
- return self.lineIndexes
-
- def searchInLineIndexDocs(self,indexName,word,uniq=True,regExp=False):
- """search occurences in an index
- @param indexName: name of the index to be searched in
- @param word: word to be searched
- @param unique: (optional) unify the list of results
- @param regExp: (optional) use regular expressions
- """
- if regExp:
- return self.searchRegExpInLineIndexDocs(indexName,word)
-
- try:
-
- lst=list(self.lineIndexes[indexName].get(word).keys())
- except:
- logging.error("error: searchInLineIndexDocs (%s %s)"%(sys.exc_info()[0:2]))
- lst=[]
- if uniq:
- return unique(lst)
- else:
- return lst
-
- def getLinesFromIndex(self,indexName,word,doc,regExp=False):
- """return all lines from a document where word is found
- @param indexName: Name of the index
- @param word: word to be searched
- @param doc: name of the document (usuallay the p-number)
- @param regExp: (optional) use regExp
- """
-
- if not regExp:
- return self.lineIndexes[indexName].get(word)[doc]
- else: # wenn regexp, suche welches word
- for w in self.findWordRegExp(indexName,word):
- if self.lineIndexes[indexName].get(w): # ein word in im dex gefunden
- try:
- dc=self.lineIndex[indexName].get(word)[doc]
- return dc # und ein document dann gib es zurueck
- except:
- pass #andernfalls weiter
-
- def cleanInLineIndex(self,indexName):
- """empty an InlineIndex
- @param indexName: name of the index
- """
- for x in list(self.lineIndexes[indexName].keys()):
- del(self.lineIndexes[indexName][x])
- print [x for x in self.lineIndexes[indexName].keys()]
-
- return "ok"
-
- def storeInLineIndex(self,indexName,key,value):
- """store in index, key is normally a word or grapheme
- and value is a tuple (documentname, line) where the word can be found
- @param indexName: name of the index
- @param key: key in index
- @param value: value in index, value is a tuple (document name, line)
- """
- logging.error("indexing: %s %s"%(indexName,key))
- if (not hasattr(self,'lineIndexes')):
-
- self.lineIndexes={}
-
- if self.lineIndexes.get(indexName,None) is None:
- #index exisitiert noch nicht dann anlegen
-
- self.lineIndexes[indexName]=OOBTree()
- lis=self.lineIndexes
- li=lis[indexName]
-
- if li.has_key(key):
-
-# if li[key].has_key(value[0]) and (not (value[1] in li[key][value[0]])):
- if li[key].has_key(value[0]):
- tmp=li[key][value[0]]
- tmp.append(value[1]) # add it if now in the array
- li[key][value[0]]=tmp[0:]
- else:
- li[key][value[0]]=[value[1]] # new array for lines
-
- else:
-
- li[key]=OOBTree()# new btree for lines
- li[key][value[0]]=[value[1]]
-
-
- self.lineIndexes=lis
-
- transaction.get().commit()
-
-
- def showFile(self,fileId):
+ def showFile(self,fileId,wholePage=False):
"""show a file
@param fileId: P-Number of the document to be displayed
"""
@@ -2163,14 +2050,18 @@ class CDLIRoot(Folder):
if not f:
return ""
- return f[0].getObject().getLastVersionFormattedData()
+ if wholePage:
+ logging.debug("show whole page")
+ return f[0].getObject().getContentObject().view()
+ else:
+ return f[0].getObject().getLastVersionFormattedData()
- def showWordInFile(self,fileId,word,lineList=None,regExp=True,indexName=""):
- """get lines with word fromFileId"""
+ def showWordInFile(self,fileId,word,lineList=None,regExp=False,indexName=""):
+ """get lines with word from FileId"""
file=self.showFile(fileId)
-
+ logging.debug("show word regEXP %s"%regExp)
ret=[]
if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen
wordlist=self.findWordRegExp(indexName,word)
@@ -2180,7 +2071,8 @@ class CDLIRoot(Folder):
for line in file.split("\n"):
found=False
for word in wordlist:
- if line.find(word)>-1:
+ try: # just a hack because of possible unicode errors in line
+ if line.find(word)>-1:
if lineList: #liste of moeglichen Zeilennummern
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
@@ -2191,13 +2083,15 @@ class CDLIRoot(Folder):
ret.append(line)
break;
+ except:
+ pass
return ret
- def tagWordInFile(self,fileId,word,lineList=None,regExp=True,indexName=""):
- """get lines with word fromFileId"""
+ def tagWordInFile(self,fileId,word,lineList=None,regExp=False,indexName=""):
+ """get text with word highlighted from FileId"""
file=self.showFile(fileId)
- tagStr="""%s"""
+ tagStr=u'%s'
ret=[]
if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen
@@ -2206,6 +2100,7 @@ class CDLIRoot(Folder):
wordlist=[word]
for line in file.split("\n"):
+ line = unicodify(line)
found=False
for word in wordlist:
if line.find(word)>-1: #word ist gefunden dann makiere und breche die Schleife ab
@@ -2223,7 +2118,7 @@ class CDLIRoot(Folder):
if not found: #word wurde nicht gefunden keine makierung
ret.append(line)
- return "
\n".join(ret)
+ return u'
\n'.join(ret)
def URLquote(self,str):
"""quote url"""