version 1.65, 2007/02/08 10:46:36
|
version 1.71, 2007/03/21 19:29:23
|
Line 25 from BTrees.OOBTree import OOBTree
|
Line 25 from BTrees.OOBTree import OOBTree
|
import logging |
import logging |
import transaction |
import transaction |
import copy |
import copy |
|
import codecs |
|
|
def unique(s): |
def unique(s): |
"""Return a list of the elements in s, but without duplicates. |
"""Return a list of the elements in s, but without duplicates. |
Line 424 class uploadATFThread(Thread):
|
Line 425 class uploadATFThread(Thread):
|
#if not than add filename to the list of newfiles |
#if not than add filename to the list of newfiles |
|
|
data=file(os.path.join(dir,fn)).read() |
data=file(os.path.join(dir,fn)).read() |
#status,msg=checkFile(fn,data,dir) |
status,msg=checkFile(fn,data,dir) |
status=True |
#status=True |
msg="" |
|
|
|
if not status: # error |
if not status: # error |
errors.append((fn,msg)) |
errors.append((fn,msg)) |
|
|
else: |
else: |
if len(founds)==0: |
if len(founds)==0: |
newPs.append(fn) |
newPs.append(fn) |
Line 632 class CDLIBasketContainer(OrderedFolder)
|
Line 635 class CDLIBasketContainer(OrderedFolder)
|
|
|
return baskets |
return baskets |
|
|
|
def joinBasket(self,newbasket,oldbaskets,delete=False): |
|
"""join two baskets |
|
@param newbasket: name of the new basket |
|
@param oldbaskets: list of baskets to be joined |
|
@param delete: (optional) delete the old baskets |
|
""" |
|
|
|
|
def getNewId(self): |
def getNewId(self): |
Line 1182 class CDLIFileObject(CatalogAware,extVer
|
Line 1191 class CDLIFileObject(CatalogAware,extVer
|
def getFormattedData(self): |
def getFormattedData(self): |
"""fromat text""" |
"""fromat text""" |
data=self.getData() |
data=self.getData() |
return re.sub("\s\#lem"," #lem",data) #remove return vor #lem |
# return re.sub("\s\#lem"," #lem",data) #remove return vor #lem |
|
return re.sub("#lem"," #lem",data) #remove return vor #lem |
|
|
def view(self): |
def view(self): |
"""view file""" |
"""view file""" |
Line 1419 def manage_addCDLIFile(self,id,title,loc
|
Line 1429 def manage_addCDLIFile(self,id,title,loc
|
if RESPONSE is not None: |
if RESPONSE is not None: |
RESPONSE.redirect('manage_main') |
RESPONSE.redirect('manage_main') |
|
|
|
def checkUTF8(data): |
|
"""check utf 8""" |
|
try: |
|
data.encode('utf-8') |
|
return True |
|
except: |
|
return False |
|
|
|
|
def checkFile(filename,data,folder): |
def checkFile(filename,data,folder): |
Line 1430 def checkFile(filename,data,folder):
|
Line 1447 def checkFile(filename,data,folder):
|
return False,"P missing in the filename" |
return False,"P missing in the filename" |
elif len(fn[0])!=7: |
elif len(fn[0])!=7: |
return False,"P number has not the right length 6" |
return False,"P number has not the right length 6" |
else: |
elif not checkUTF8(data): |
fn=os.path.join(folder,filename) |
return False,"not utf-8" |
stin,out=os.popen4("/usr/bin/atfcheck.plx %s"%fn) |
|
value=out.read() |
|
ret= out.close() |
|
|
|
if value: |
|
|
|
return False,"atf checker error: %s"%value |
|
else: |
else: |
return True,"" |
return True,"" |
|
|
|
|
def splitatf(fh,dir=None,ext=None): |
def splitatf(fh,dir=None,ext=None): |
"""split it""" |
"""split it""" |
ret=None |
ret=None |
Line 1449 def splitatf(fh,dir=None,ext=None):
|
Line 1460 def splitatf(fh,dir=None,ext=None):
|
i=0 |
i=0 |
|
|
for lineTmp in fh.readlines(): |
for lineTmp in fh.readlines(): |
|
lineTmp=lineTmp.replace(codecs.BOM_UTF8,'') # make sure that all BOM are removed.. |
for line in lineTmp.split("\r"): |
for line in lineTmp.split("\r"): |
|
#logging.log("Deal with: %s"%line) |
if ext: |
if ext: |
i+=1 |
i+=1 |
if (i%100)==0: |
if (i%100)==0: |
Line 1534 class CDLIFileFolder(extVersionedFileFol
|
Line 1547 class CDLIFileFolder(extVersionedFileFol
|
|
|
return ret |
return ret |
|
|
|
def getFile(self,fn): |
|
"""get the content of the file fn""" |
|
founds=self.CDLICatalog.search({'title':fn}) |
|
if not founds: |
|
return [] |
|
else: |
|
obj=founds[0].getObject().getLastVersion() |
|
|
|
return obj.getData()[0:] |
|
|
def checkCatalog(self,fn): |
def checkCatalog(self,fn): |
"""check if fn is in the catalog""" |
"""check if fn is in the catalog""" |
#TODO add checkCatalog |
#TODO add checkCatalog |
Line 1556 class CDLIFileFolder(extVersionedFileFol
|
Line 1579 class CDLIFileFolder(extVersionedFileFol
|
return pt(search=list,author=author) |
return pt(search=list,author=author) |
|
|
|
|
|
def getAllPNumbers(self): |
|
"""get a list of all files (resp their p-numbers) stored""" |
|
|
|
ret=[x.getId for x in self.CDLICatalog()] |
|
|
|
return ret |
|
|
def findObjectsFromList(self,enterList=None,display=False,start=None,upload=None,list=None,basketName=None,numberOfObjects=None,RESPONSE=None): |
def findObjectsFromList(self,enterList=None,display=False,start=None,upload=None,list=None,basketName=None,numberOfObjects=None,RESPONSE=None): |
"""findObjectsFromList (, TAB oder LINE separated)""" |
"""findObjectsFromList (, TAB oder LINE separated)""" |
Line 1653 class CDLIFileFolder(extVersionedFileFol
|
Line 1682 class CDLIFileFolder(extVersionedFileFol
|
#os.write(tf,obj.getLastVersion().data) |
#os.write(tf,obj.getLastVersion().data) |
if RESPONSE: |
if RESPONSE: |
RESPONSE.write(obj.getLastVersion().getData()[0:]) |
RESPONSE.write(obj.getLastVersion().getData()[0:]) |
|
RESPONSE.write("\n") |
self.temp_folder.downloadCounter-=1 |
self.temp_folder.downloadCounter-=1 |
self._p_changed=1 |
self._p_changed=1 |
transaction.get().commit() |
transaction.get().commit() |
Line 1737 class CDLIRoot(Folder):
|
Line 1767 class CDLIRoot(Folder):
|
meta_type="CDLIRoot" |
meta_type="CDLIRoot" |
downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible |
downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible |
|
|
def findWordRegExp(self,searchTerm): |
def findWordRegExp(self,indexName,searchTerm): |
"""find all words in index which match regexp in SearchTerm""" |
"""find all words in index which match regexp in SearchTerm |
|
@param indexName: name of the index to be searched in |
|
@param searchTerm: word to be searched""" |
|
|
ret=[] |
ret=[] |
for x in self.lineIndex.iterkeys(): |
for x in self.lineIndexes[indexName].iterkeys(): |
if re.match(searchTerm,x): |
if re.match(searchTerm,x): |
ret.append(x) |
ret.append(x) |
return ret |
return ret |
|
|
def searchRegExpInLineIndexDocs(self,searchTerm): |
def searchRegExpInLineIndexDocs(self,indexName,searchTerm): |
"""search in inLineIndex with regexp""" |
"""search in inLineIndex with regexp |
|
@param indexName: name of the index to be searched in |
|
@param searchTerm: term to be searched |
|
""" |
if not searchTerm: |
if not searchTerm: |
return [] |
return [] |
ret=[] |
ret=[] |
words=self.findWordRegExp(searchTerm) # suche nach allen Treffern |
words=self.findWordRegExp(indexName,searchTerm) # suche nach allen Treffern |
logging.info("wd:%s"%words) |
logging.info("wd:%s"%words) |
for word in words: |
for word in words: |
ret+=self.searchInLineIndexDocs(word) |
ret+=self.searchInLineIndexDocs(indexName,word) |
|
|
return unique(ret) |
return unique(ret) |
|
|
def showInLineIndex(self): |
def showInLineIndex(self): |
"""get the index for debug purposes""" |
"""get the index for debug purposes""" |
print "show" |
print "show" |
for x in self.lineIndex.iterkeys(): |
for key in self.lineIndexes.keys(): |
|
logging.info("index:%s"%key) |
|
for x in self.lineIndexes[key].iterkeys(): |
logging.info("word:%s"%repr(x)) |
logging.info("word:%s"%repr(x)) |
#for y in self.lineIndex[x].iterkeys(): |
#for y in self.lineIndex[x].iterkeys(): |
# print "doc",repr(y),repr(self.lineIndex[x][y]) |
# print "doc",repr(y),repr(self.lineIndex[x][y]) |
|
|
return self.lineIndex |
return self.lineIndexes |
|
|
def searchInLineIndexDocs(self,word,uniq=True,regExp=False): |
def searchInLineIndexDocs(self,indexName,word,uniq=True,regExp=False): |
"""search occurences""" |
"""search occurences in an index |
|
@param indexName: name of the index to be searched in |
|
@param word: word to be searched |
|
@param unique: (optional) unify the list of results |
|
@param regExp: (optional) use regular expressions |
|
""" |
|
|
if regExp: |
if regExp: |
return self.searchRegExpInLineIndexDocs(word) |
return self.searchRegExpInLineIndexDocs(indexName,word) |
|
|
try: |
try: |
lst=list(self.lineIndex.get(word).keys()) |
|
|
lst=list(self.lineIndexes[indexName].get(word).keys()) |
except: |
except: |
|
logging.error("error: searchInLineIndexDocs (%s %s)"%(sys.exc_info()[0:2])) |
lst=[] |
lst=[] |
if uniq: |
if uniq: |
return unique(lst) |
return unique(lst) |
else: |
else: |
return lst |
return lst |
|
|
def getLinesFromIndex(self,word,doc,regExp=False): |
def getLinesFromIndex(self,indexName,word,doc,regExp=False): |
"""get lines""" |
"""return all lines from a document where word is found |
|
@param indexName: Name of the index |
|
@param word: word to be searched |
|
@param doc: name of the document (usuallay the p-number) |
|
@param regExp: (optional) use regExp |
|
""" |
|
|
if not regExp: |
if not regExp: |
return self.lineIndex.get(word)[doc] |
return self.lineIndexes[indexName].get(word)[doc] |
else: # wenn regexp, suche welches word |
else: # wenn regexp, suche welches word |
for w in self.findWordRegExp(word): |
for w in self.findWordRegExp(indexName,word): |
if self.lineIndex.get(w): # ein word in im dex gefunden |
if self.lineIndexes[indexName].get(w): # ein word in im dex gefunden |
try: |
try: |
dc=self.lineIndex.get(word)[doc] |
dc=self.lineIndex[indexName].get(word)[doc] |
return dc # und ein document dann gib es zurueck |
return dc # und ein document dann gib es zurueck |
except: |
except: |
pass #andernfalls weiter |
pass #andernfalls weiter |
|
|
def cleanInLineIndex(self): |
def cleanInLineIndex(self,indexName): |
"""delete InlineIndex""" |
"""empty an InlineIndex |
for x in list(self.lineIndex.keys()): |
@param indexName: name of the index |
del(self.lineIndex[x]) |
""" |
print [x for x in self.lineIndex.keys()] |
for x in list(self.lineIndexes[indexName].keys()): |
|
del(self.lineIndexes[indexName][x]) |
|
print [x for x in self.lineIndexes[indexName].keys()] |
|
|
return "ok" |
return "ok" |
|
|
def storeInLineIndex(self,key,value): |
def storeInLineIndex(self,indexName,key,value): |
"""store in index""" |
"""store in index, key is normally a word or grapheme |
|
and value is a tuple (documentname, line) where the word can be found |
|
@param indexName: name of the index |
|
@param key: key in index |
|
@param value: value in index, value is a tuple (document name, line) |
|
""" |
|
logging.error("indexing: %s %s"%(indexName,key)) |
|
if (not hasattr(self,'lineIndexes')): |
|
|
if (not hasattr(self,'lineIndex')) or (type(self.lineIndex) is DictType): |
self.lineIndexes={} |
self.lineIndex=OOBTree() |
|
li=self.lineIndex |
if self.lineIndexes.get(indexName,None) is None: |
|
#index exisitiert noch nicht dann anlegen |
|
|
|
self.lineIndexes[indexName]=OOBTree() |
|
lis=self.lineIndexes |
|
li=lis[indexName] |
|
|
if li.has_key(key): |
if li.has_key(key): |
|
|
Line 1826 class CDLIRoot(Folder):
|
Line 1891 class CDLIRoot(Folder):
|
li[key][value[0]]=[value[1]] |
li[key][value[0]]=[value[1]] |
|
|
|
|
self.lineIndex=li |
self.lineIndexes=lis |
|
|
transaction.get().commit() |
transaction.get().commit() |
|
|
|
|
def showFile(self,fileId): |
def showFile(self,fileId): |
"""show a file""" |
"""show a file |
|
@param fileId: P-Number of the document to be displayed |
|
""" |
f=self.CDLICatalog({'title':fileId}) |
f=self.CDLICatalog({'title':fileId}) |
if not f: |
if not f: |
return "" |
return "" |
|
|
return f[0].getObject().getLastVersionFormattedData() |
return f[0].getObject().getLastVersionFormattedData() |
|
|
def showLineFromFile(self,fileId,lineNum,word): |
|
"""get line lineNum fromFileId""" |
|
|
|
file=self.showFile(fileId) |
def showWordInFile(self,fileId,word,lineList=None,regExp=True,indexName=""): |
#str="^%s\.[^%s\.]*%s[^\n]*\n"%(lineNum,lineNum,word) |
|
#str="^%s\..*?%s[^\n]*\n"%(lineNum,word) |
|
|
|
#print str |
|
#m=re.search(str,file,flags=re.M|re.DOTALL) |
|
#if m: |
|
# return m.group() |
|
#else: |
|
# return "" |
|
#ret=lineNum+"." |
|
#splitted=file.split(lineNum+".") |
|
#if len(splitted)>1: |
|
#for part in splitted[1:]: |
|
#if part.find(word)>-1: |
|
# for x in part.split("\n"): |
|
#ret+=x |
|
#if x.find(word)>-1: |
|
#break |
|
#break; |
|
#return ret |
|
|
|
def showWordInFile(self,fileId,word,lineList=None): |
|
"""get lines with word fromFileId""" |
"""get lines with word fromFileId""" |
|
|
file=self.showFile(fileId) |
file=self.showFile(fileId) |
|
|
ret=[] |
ret=[] |
|
if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen |
|
wordlist=self.findWordRegExp(indexName,word) |
|
else: |
|
wordlist=[word] |
|
|
for line in file.split("\n"): |
for line in file.split("\n"): |
|
found=False |
|
for word in wordlist: |
if line.find(word)>-1: |
if line.find(word)>-1: |
if lineList: #liste of moeglichen Zeilennummern |
if lineList: #liste of moeglichen Zeilennummern |
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile |
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile |
Line 1880 class CDLIRoot(Folder):
|
Line 1930 class CDLIRoot(Folder):
|
ret.append(line) |
ret.append(line) |
else: # nimm alles ohne line check |
else: # nimm alles ohne line check |
ret.append(line) |
ret.append(line) |
|
|
|
break; |
return ret |
return ret |
|
|
def tagWordInFile(self,fileId,word,lineList=None): |
def tagWordInFile(self,fileId,word,lineList=None,regExp=True,indexName=""): |
"""get lines with word fromFileId""" |
"""get lines with word fromFileId""" |
|
|
file=self.showFile(fileId) |
file=self.showFile(fileId) |
tagStr="""<span class="found">%s</span>""" |
tagStr="""<span class="found">%s</span>""" |
ret=[] |
ret=[] |
|
|
|
if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen |
|
wordlist=self.findWordRegExp(indexName,word) |
|
else: |
|
wordlist=[word] |
|
|
for line in file.split("\n"): |
for line in file.split("\n"): |
if line.find(word)>-1: |
found=False |
|
for word in wordlist: |
|
if line.find(word)>-1: #word ist gefunden dann makiere und breche die Schleife ab |
if lineList: #liste of moeglichen Zeilennummern |
if lineList: #liste of moeglichen Zeilennummern |
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile |
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile |
|
|
if num in lineList: |
if num in lineList: |
|
|
ret.append(line.replace(word,tagStr%word)) |
ret.append(line.replace(word,tagStr%word)) |
|
|
else: # nimm alles ohne line check |
else: # nimm alles ohne line check |
ret.append(line.replace(word,tagStr%word)) |
ret.append(line.replace(word,tagStr%word)) |
else: |
found=True |
|
break |
|
if not found: #word wurde nicht gefunden keine makierung |
ret.append(line) |
ret.append(line) |
|
|
return "<br>\n".join(ret) |
return "<br>\n".join(ret) |
|
|
def URLquote(self,str): |
def URLquote(self,str): |
Line 1930 class CDLIRoot(Folder):
|
Line 1994 class CDLIRoot(Folder):
|
|
|
return ret |
return ret |
|
|
def forceDahl(self): |
|
"break all locks" |
|
ret=[] |
|
for f in self.ZopeFind(self,obj_metatypes="CDLI file",search_sub=1): |
|
if str(f[1].lockedBy)=="dahl": |
|
un=f[1].forceunlock() |
|
|
|
if un and un !="": |
|
ret.append((f[0],un)) |
|
|
|
return ret |
|
|
|
def getChangesByAuthor(self,author,n=100): |
def getChangesByAuthor(self,author,n=100): |
"""getChangesByAuthor""" |
"""getChangesByAuthor""" |
Line 2005 class CDLIRoot(Folder):
|
Line 2058 class CDLIRoot(Folder):
|
|
|
|
|
def uploadATF(self,repeat=None,upload=None,basketId=0,RESPONSE=None): |
def uploadATF(self,repeat=None,upload=None,basketId=0,RESPONSE=None): |
"""standard ausgabe""" |
"""upload an atf file / basket file""" |
#self._v_uploadATF.returnValue=None |
#self._v_uploadATF.returnValue=None |
|
|
|
#generate an random id for the upload thread |
from random import randint |
from random import randint |
if (not self.REQUEST.SESSION.get('idTmp',None)): |
if (not self.REQUEST.SESSION.get('idTmp',None)): |
|
|
Line 2015 class CDLIRoot(Folder):
|
Line 2070 class CDLIRoot(Folder):
|
else: |
else: |
idTmp=self.REQUEST.SESSION.get('idTmp',None) |
idTmp=self.REQUEST.SESSION.get('idTmp',None) |
|
|
|
|
threadName=repeat |
threadName=repeat |
if not threadName or threadName=="": |
if not threadName or threadName=="": |
|
#new thread not called from the waiting page |
tmpVar=False |
tmpVar=False |
|
|
thread=uploadATFThread() |
thread=uploadATFThread() |
Line 2064 class CDLIRoot(Folder):
|
Line 2121 class CDLIRoot(Folder):
|
return pt(txt='/uploadATF',threadName=threadName) |
return pt(txt='/uploadATF',threadName=threadName) |
|
|
else: |
else: |
# tmp={} |
|
# for key in self._v_uploadATF[threadName].returnValue.keys(): |
|
# t=self._v_uploadATF[threadName].returnValue[key] |
|
# if type(t) is ListType: |
|
# tmp[key]=self._v_uploadATF[threadName].returnValue[key][0:] |
|
# else: |
|
# tmp[key]=self._v_uploadATF[threadName].returnValue[key] |
|
# repr(tmp[key]),repr(key) |
|
# |
|
# # |
|
#tmp=self.cdli_main.tmpStore2[threadName] |
|
|
|
tmp=getattr(self.temp_folder,idTmp).returnValue |
tmp=getattr(self.temp_folder,idTmp).returnValue |
|
|
|
|
|
|
|
|
#del(self.cdli_main.tmpStore2[threadName]) |
|
|
|
|
|
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadCheck.zpt')).__of__(self) |
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadCheck.zpt')).__of__(self) |
|
|
return pt(changed=tmp['changed'],lockerrors=tmp['lockerrors'],errors=tmp['errors'],dir=tmp['dir'],newPs=tmp['newPs'],basketLen=tmp['basketLen'],numberOfFiles=tmp['numberOfFiles'], |
return pt(changed=tmp['changed'],lockerrors=tmp['lockerrors'],errors=tmp['errors'],dir=tmp['dir'],newPs=tmp['newPs'],basketLen=tmp['basketLen'],numberOfFiles=tmp['numberOfFiles'], |