version 1.80.2.4, 2007/10/24 20:36:07
|
version 1.81, 2008/01/21 17:19:01
|
Line 53 def utf8ify(s):
|
Line 53 def utf8ify(s):
|
else: |
else: |
return s.encode('utf-8') |
return s.encode('utf-8') |
|
|
def formatAtfLineHtml(l, nolemma=True): |
def formatAtfHtml(l): |
"""escape special ATF characters for HTML""" |
"""escape special ATF characters for HTML""" |
if not l: |
if not l: |
return "" |
return "" |
|
|
if nolemma: |
|
# ignore lemma lines |
|
if l.lstrip().startswith('#lem:'): |
|
return "" |
|
# replace & |
# replace & |
l = l.replace('&','&') |
l = l.replace('&','&') |
# replace angular brackets |
# replace angular brackets |
Line 69 def formatAtfLineHtml(l, nolemma=True):
|
Line 65 def formatAtfLineHtml(l, nolemma=True):
|
l = l.replace('>','>') |
l = l.replace('>','>') |
return l |
return l |
|
|
|
def formatAtfLineHtml(l, nolemma=True): |
|
"""format ATF line for HTML""" |
|
if not l: |
|
return "" |
|
|
|
if nolemma: |
|
# ignore lemma lines |
|
if l.lstrip().startswith('#lem:'): |
|
return "" |
|
|
|
return formatAtfHtml(l) |
|
|
|
|
|
|
|
def formatAtfFullLineNum(txt, nolemma=True): |
|
"""format full line numbers in ATF text""" |
|
# surface codes |
|
surfaces = {'@obverse':'obv', |
|
'@reverse':'rev', |
|
'@surface':'surface', |
|
'@edge':'edge', |
|
'@left':'left', |
|
'@right':'right', |
|
'@top':'top', |
|
'@bottom':'bottom', |
|
'@face':'face', |
|
'@seal':'seal'} |
|
|
|
if not txt: |
|
return "" |
|
|
|
ret = [] |
|
surf = "" |
|
col = "" |
|
for line in txt.splitlines(): |
|
line = unicodify(line) |
|
if line and line[0] == '@': |
|
# surface or column |
|
words = line.split(' ') |
|
if words[0] in surfaces: |
|
surf = line.replace(words[0],surfaces[words[0]]).strip() |
|
|
|
elif words[0] == '@column': |
|
col = ' '.join(words[1:]) |
|
|
|
elif line and line[0] in '123456789': |
|
# ordinary line -> add line number |
|
line = "%s:%s:%s"%(surf,col,line) |
|
|
|
ret.append(line) |
|
|
|
return '\n'.join(ret) |
|
|
|
|
def generateXMLReturn(hash): |
def generateXMLReturn(hash): |
"""erzeugt das xml file als returnwert fuer uploadATFRPC""" |
"""erzeugt das xml file als returnwert fuer uploadATFRPC""" |
Line 95 def generateXMLReturn(hash):
|
Line 144 def generateXMLReturn(hash):
|
return ret |
return ret |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def unique(s): |
def unique(s): |
"""Return a list of the elements in s, but without duplicates. |
"""Return a list of the elements in s, but without duplicates. |
|
|
Line 272 class uploadATFfinallyThread(Thread):
|
Line 313 class uploadATFfinallyThread(Thread):
|
self.result+="<h2>Start processing</h2>" |
self.result+="<h2>Start processing</h2>" |
|
|
#shall I only upload the changed files? |
#shall I only upload the changed files? |
logging.info("uploadATFfinally procedure: %s"%procedure) |
logging.debug("uploadATFfinally procedure: %s"%procedure) |
if procedure=="uploadchanged": |
if procedure=="uploadchanged": |
changed=[x[0] for x in SESSION.get('changed',[])] |
changed=[x[0] for x in SESSION.get('changed',[])] |
uploadFns=changed+SESSION.get('newPs',[]) |
uploadFns=changed+SESSION.get('newPs',[]) |
Line 293 class uploadATFfinallyThread(Thread):
|
Line 334 class uploadATFfinallyThread(Thread):
|
#do first the changed files |
#do first the changed files |
i=0 |
i=0 |
for fn in uploadFns: |
for fn in uploadFns: |
|
logging.debug("uploadATFfinally uploadFn=%s"%fn) |
i+=1 |
i+=1 |
founds=ctx2.CDLICatalog.search({'title':fn}) |
founds=ctx2.CDLICatalog.search({'title':fn}) |
if len(founds)>0: |
if len(founds)>0: |
SESSION['author']=str(username) |
SESSION['author']=str(username) |
self.result="<p>Changing : %s"%fn+self.result |
self.result="<p>Changing : %s"%fn+self.result |
|
logging.debug("uploadatffinallythread changing:%s"%fn+self.result) |
founds[0].getObject().manage_addCDLIFileObject('',comment,SESSION['author'],file=os.path.join(SESSION['tmpdir'],fn),from_tmp=True) |
founds[0].getObject().manage_addCDLIFileObject('',comment,SESSION['author'],file=os.path.join(SESSION['tmpdir'],fn),from_tmp=True) |
if i==200: |
if i%200==0: |
i=0 |
|
transaction.get().commit() |
transaction.get().commit() |
logging.info("changing: do commit") |
logging.debug("uploadatffinallythread changing: do commit") |
|
|
transaction.get().commit() |
transaction.get().commit() |
logging.info("changing: last commit") |
logging.debug("uploadatffinallythread changing: last commit") |
|
|
#now add the new files |
#now add the new files |
newPs=SESSION['newPs'] |
newPs=SESSION['newPs'] |
if len(newPs)>0: |
if len(newPs)>0: |
tmpDir=SESSION['tmpdir'] |
tmpDir=SESSION['tmpdir'] |
logging.info("adding start") |
logging.debug("uploadatffinallythread adding start") |
self.result="<p>Adding files</p>"+self.result |
self.result="<p>Adding files</p>"+self.result |
#TODO: make this configurable, at the moment base folder for the files has to be cdli_main |
#TODO: make this configurable, at the moment base folder for the files has to be cdli_main |
|
|
ctx2.importFiles(comment=comment,author=str(username) ,folderName=tmpDir, files=newPs,ext=self) |
ctx2.importFiles(comment=comment,author=str(username) ,folderName=tmpDir, files=newPs,ext=self) |
logging.info("adding finished") |
logging.debug("uploadatffinallythread adding finished") |
|
|
|
|
#unlock locked files? |
#unlock locked files? |
if unlock: |
if unlock: |
logging.info("unlocking start") |
logging.debug("uploadatffinallythread unlocking start") |
self.result="<p>Unlock files</p>"+self.result |
self.result="<p>Unlock files</p>"+self.result |
unlockFns=[] |
unlockFns=[] |
for x in os.listdir(SESSION['tmpdir']): |
for x in os.listdir(SESSION['tmpdir']): |
if not x in SESSION['errors']: |
if not x in SESSION['errors']: |
unlockFns.append(x) |
unlockFns.append(x) |
logging.info("unlocking have now what to unlock") |
|
|
logging.debug("unlocking have now what to unlock") |
|
|
for fn in unlockFns: |
for fn in unlockFns: |
#logging.info("will unlock: %s"%fn) |
#logging.info("will unlock: %s"%fn) |
Line 336 class uploadATFfinallyThread(Thread):
|
Line 377 class uploadATFfinallyThread(Thread):
|
if len(founds)>0: |
if len(founds)>0: |
#logging.info("unlock: %s"%founds[0].getObject().getId()) |
#logging.info("unlock: %s"%founds[0].getObject().getId()) |
SESSION['author']=str(username) |
SESSION['author']=str(username) |
|
|
founds[0].getObject().lockedBy="" |
founds[0].getObject().lockedBy="" |
logging.info("unlocking done") |
|
|
logging.debug("uploadatffinallythread unlocking done") |
|
|
#if a basketname is given, add files to the basket |
#if a basketname is given, add files to the basket |
if not (basketname ==''): |
if not (basketname ==''): |
logging.info("add to basket %s"%basketname) |
logging.debug("uploadatffinallythread add to basket %s"%basketname) |
self.result="<p>Add to basket</p>"+self.result |
self.result="<p>Add to basket</p>"+self.result |
basketId=ctx2.basketContainer.getBasketIdfromName(basketname) |
basketId=ctx2.basketContainer.getBasketIdfromName(basketname) |
|
|
if not basketId: # create new basket |
if not basketId: # create new basket |
logging.info("create basket %s"%basketname) |
logging.debug("uploadatffinallythread create basket %s"%basketname) |
self.result="<p>Create a new basket</p>"+self.result |
self.result="<p>Create a new basket</p>"+self.result |
ob=ctx2.basketContainer.addBasket(basketname) |
ob=ctx2.basketContainer.addBasket(basketname) |
basketId=ob.getId() |
basketId=ob.getId() |
Line 355 class uploadATFfinallyThread(Thread):
|
Line 396 class uploadATFfinallyThread(Thread):
|
ids=os.listdir(SESSION['tmpdir']) |
ids=os.listdir(SESSION['tmpdir']) |
basket.addObjects(ids,deleteOld=True,username=str(username)) |
basket.addObjects(ids,deleteOld=True,username=str(username)) |
|
|
|
logging.debug("uploadatffinallythread uploadfinally done") |
|
|
if RESPONSE is not None: |
if RESPONSE is not None: |
RESPONSE.redirect(self.aq_parent.absolute_url()) |
RESPONSE.redirect(self.aq_parent.absolute_url()) |
|
|
|
|
logging.info("uploadfinally done") |
|
return True |
return True |
|
|
class tmpStore(SimpleItem): |
class tmpStore(SimpleItem): |
Line 591 class CDLIBasketContainer(OrderedFolder)
|
Line 632 class CDLIBasketContainer(OrderedFolder)
|
ret+=str(object[0].getData())+"\n" |
ret+=str(object[0].getData())+"\n" |
elif current=="yes": |
elif current=="yes": |
#search current object |
#search current object |
logging.info("crrent: %s"%object[1].getId().split(".")[0]) |
logging.debug("current: %s"%object[1].getId().split(".")[0]) |
founds=self.CDLICatalog.search({'title':object[1].getId().split(".")[0]}) |
founds=self.CDLICatalog.search({'title':object[1].getId().split(".")[0]}) |
if len(founds)>0: |
if len(founds)>0: |
ret+=str(founds[0].getObject().getLastVersion().getData())+"\n" |
ret+=str(founds[0].getObject().getLastVersion().getData())+"\n" |
Line 1471 class CDLIFileObject(CatalogAware,extVer
|
Line 1512 class CDLIFileObject(CatalogAware,extVer
|
|
|
security.declareProtected('manage','index_html') |
security.declareProtected('manage','index_html') |
|
|
|
security.declarePublic('view') |
|
view = PageTemplateFile('zpt/viewCDLIFile.zpt', globals()) |
|
|
|
security.declarePublic('editATF') |
|
editATF = PageTemplateFile('zpt/editATFFile.zpt', globals()) |
|
|
def PrincipiaSearchSource(self): |
def PrincipiaSearchSource(self): |
"""Return cataloguable key for ourselves.""" |
"""Return cataloguable key for ourselves.""" |
return str(self) |
return str(self) |
Line 1485 class CDLIFileObject(CatalogAware,extVer
|
Line 1532 class CDLIFileObject(CatalogAware,extVer
|
def makeThisVersionCurrent(self,comment,author,RESPONSE=None): |
def makeThisVersionCurrent(self,comment,author,RESPONSE=None): |
"""copy this version to current""" |
"""copy this version to current""" |
parent=self.aq_parent |
parent=self.aq_parent |
newversion=parent.manage_addCDLIFileObject('',comment,author) |
parent.manage_addVersionedFileObject(id=None,vC=comment,author=author,file=self.getData(),RESPONSE=RESPONSE) |
newversion.manage_upload(self.getData()) |
#newversion=parent.manage_addCDLIFileObject('',comment,author) |
|
#newversion.manage_upload(self.getData()) |
|
|
if RESPONSE is not None: |
#if RESPONSE is not None: |
RESPONSE.redirect(self.aq_parent.absolute_url()+'/history') |
# RESPONSE.redirect(self.aq_parent.absolute_url()+'/history') |
|
|
return True |
return True |
|
|
Line 1499 class CDLIFileObject(CatalogAware,extVer
|
Line 1547 class CDLIFileObject(CatalogAware,extVer
|
# return re.sub("\s\#lem"," #lem",data) #remove return vor #lem |
# return re.sub("\s\#lem"," #lem",data) #remove return vor #lem |
return re.sub("#lem"," #lem",data) #remove return vor #lem |
return re.sub("#lem"," #lem",data) #remove return vor #lem |
|
|
security.declarePublic('view') |
|
def view(self): |
|
"""view file""" |
|
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','viewCDLIFile.zpt')).__of__(self) |
|
return pt() |
|
|
|
security.declarePublic('getPNumber') |
security.declarePublic('getPNumber') |
def getPNumber(self): |
def getPNumber(self): |
Line 1533 class CDLIFileObject(CatalogAware,extVer
|
Line 1576 class CDLIFileObject(CatalogAware,extVer
|
except: |
except: |
return "ERROR" |
return "ERROR" |
|
|
|
|
manage_addCDLIFileObjectForm=DTMLFile('dtml/fileAdd', globals(),Kind='CDLIFileObject',kind='CDLIFileObject', version='1') |
manage_addCDLIFileObjectForm=DTMLFile('dtml/fileAdd', globals(),Kind='CDLIFileObject',kind='CDLIFileObject', version='1') |
|
|
def manage_addCDLIFileObject(self,id,vC='',author='', file='',title='',versionNumber=0, |
def manage_addCDLIFileObject(self,id,vC='',author='', file='',title='',versionNumber=0, |
Line 1565 def manage_addCDLIFileObject(self,id,vC=
|
Line 1609 def manage_addCDLIFileObject(self,id,vC=
|
if content_type: |
if content_type: |
fob.content_type=content_type |
fob.content_type=content_type |
|
|
logging.debug("manage_add: lastversion=%s"%self.getData()) |
#logging.debug("manage_add: lastversion=%s"%self.getData()) |
logging.debug("reindex1: %s in %s"%(repr(self),repr(self.default_catalog))) |
logging.debug("reindex1: %s in %s"%(repr(self),repr(self.default_catalog))) |
self.reindex_object() |
self.reindex_object() |
logging.debug("manage_add: fob_data=%s"%fob.getData()) |
#logging.debug("manage_add: fob_data=%s"%fob.getData()) |
logging.debug("reindex2: %s in %s"%(repr(fob), repr(fob.default_catalog))) |
logging.debug("reindex2: %s in %s"%(repr(fob), repr(fob.default_catalog))) |
fob.index_object() |
fob.index_object() |
|
|
Line 1584 class CDLIFile(extVersionedFile,CatalogA
|
Line 1628 class CDLIFile(extVersionedFile,CatalogA
|
content_meta_type = ["CDLI File Object"] |
content_meta_type = ["CDLI File Object"] |
|
|
default_catalog='CDLICatalog' |
default_catalog='CDLICatalog' |
|
|
security.declareProtected('manage','index_html') |
security.declareProtected('manage','index_html') |
|
|
def getLastVersionData(self): |
def getLastVersionData(self): |
Line 1594 class CDLIFile(extVersionedFile,CatalogA
|
Line 1639 class CDLIFile(extVersionedFile,CatalogA
|
"""get last version data""" |
"""get last version data""" |
return self.getContentObject().getFormattedData() |
return self.getContentObject().getFormattedData() |
|
|
|
def getTextId(self): |
|
"""returns P-number of text""" |
|
# assuming that its the beginning of the title |
|
return self.title[:7] |
|
|
#security.declarePublic('history') |
#security.declarePublic('history') |
def history(self): |
def history(self): |
"""history""" |
"""history""" |
Line 1641 class CDLIFile(extVersionedFile,CatalogA
|
Line 1691 class CDLIFile(extVersionedFile,CatalogA
|
|
|
def _newContentObject(self, id, title='', versionNumber=0, versionComment=None, time=None, author=None): |
def _newContentObject(self, id, title='', versionNumber=0, versionComment=None, time=None, author=None): |
"""factory for content objects. to be overridden in derived classes.""" |
"""factory for content objects. to be overridden in derived classes.""" |
|
logging.debug("_newContentObject(CDLI)") |
return CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=versionComment,time=time,author=author) |
return CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=versionComment,time=time,author=author) |
|
|
|
|
Line 1710 def manage_addCDLIFile(self,id,title,loc
|
Line 1761 def manage_addCDLIFile(self,id,title,loc
|
if RESPONSE is not None: |
if RESPONSE is not None: |
RESPONSE.redirect('manage_main') |
RESPONSE.redirect('manage_main') |
|
|
|
|
def checkUTF8(data): |
def checkUTF8(data): |
"""check utf 8""" |
"""check utf 8""" |
try: |
try: |
Line 1740 def splitatf(fh,dir=None,ext=None):
|
Line 1792 def splitatf(fh,dir=None,ext=None):
|
nf=None |
nf=None |
i=0 |
i=0 |
|
|
|
#ROC: why split \n first and then \r??? |
if (type(fh) is StringType) or (type(fh) is UnicodeType): |
if (type(fh) is StringType) or (type(fh) is UnicodeType): |
iter=fh.split("\n") |
iter=fh.split("\n") |
else: |
else: |
Line 1796 class CDLIFileFolder(extVersionedFileFol
|
Line 1849 class CDLIFileFolder(extVersionedFileFol
|
file_meta_type=['CDLI file'] |
file_meta_type=['CDLI file'] |
folder_meta_type=['CDLI Folder'] |
folder_meta_type=['CDLI Folder'] |
|
|
default_catalog='CDLICatalog' |
file_catalog='CDLICatalog' |
defaultFileCatalog=default_catalog #wenn dieses definiert ist, wird beim hinzufuegen einer neuen version eines files dieser catalog neuindiziert |
|
#downloadCounter=0 # counts how many download for all files currently run, be mehr als 5 wird verweigert. |
#downloadCounter=0 # counts how many download for all files currently run, be mehr als 5 wird verweigert. |
tmpStore2={} |
tmpStore2={} |
|
|
|
def _newVersionedFile(self, id, title='', lockedBy=None, author=None): |
|
"""factory for versioned files. to be overridden in derived classes.""" |
|
logging.debug("_newVersionedFile(CDLI)") |
|
return CDLIFile(id, title, lockedBy=lockedBy, author=author) |
|
|
def setTemp(self,name,value): |
def setTemp(self,name,value): |
"""set tmp""" |
"""set tmp""" |
|
|
setattr(self,name,value) |
setattr(self,name,value) |
|
|
|
deleteFileForm = PageTemplateFile("zpt/doDeleteFile", globals()) |
|
|
def delete(self,ids): |
def delete(self,ids,REQUEST=None): |
"""delete these files""" |
"""delete these files""" |
if type(ids) is not ListType: |
if type(ids) is not ListType: |
ids=[ids] |
ids=[ids] |
|
|
self.manage_delObjects(ids) |
self.manage_delObjects(ids) |
|
|
|
if REQUEST is not None: |
|
return self.index_html() |
|
|
|
|
def getVersionNumbersFromIds(self,ids): |
def getVersionNumbersFromIds(self,ids): |
"""get the numbers of the current versions of documents described by their ids""" |
"""get the numbers of the current versions of documents described by their ids""" |
Line 1834 class CDLIFileFolder(extVersionedFileFol
|
Line 1896 class CDLIFileFolder(extVersionedFileFol
|
logging.debug("getFile: %s"%repr(fn)) |
logging.debug("getFile: %s"%repr(fn)) |
if not self.hasObject(fn): |
if not self.hasObject(fn): |
# search deeper |
# search deeper |
founds=self.CDLICatalog.search({'title':fn}) |
founds=getattr(self, self.file_catalog).search({'textid':fn}) |
if founds: |
if founds: |
obj=founds[0].getObject().getContentObject() |
obj=founds[0].getObject().getContentObject() |
else: |
else: |
Line 1937 class CDLIFileFolder(extVersionedFileFol
|
Line 1999 class CDLIFileFolder(extVersionedFileFol
|
def sortF(x,y): |
def sortF(x,y): |
return cmp(x[0],y[0]) |
return cmp(x[0],y[0]) |
|
|
catalog=getattr(self,self.default_catalog) |
catalog=getattr(self,self.file_catalog) |
#tf,tfilename=mkstemp() |
#tf,tfilename=mkstemp() |
if not hasattr(self.temp_folder,'downloadCounter'): |
if not hasattr(self.temp_folder,'downloadCounter'): |
self.temp_folder.downloadCounter=0 |
self.temp_folder.downloadCounter=0 |
Line 2069 class CDLIRoot(Folder):
|
Line 2131 class CDLIRoot(Folder):
|
|
|
|
|
|
|
def searchText(self, query, index='words'): |
def searchText(self, query, index='graphemes'): |
"""searches query in the fulltext index and returns a list of file ids/P-numbers""" |
"""searches query in the fulltext index and returns a list of file ids/P-numbers""" |
|
# see also: http://www.plope.com/Books/2_7Edition/SearchingZCatalog.stx#2-13 |
|
logging.debug("searchtext for '%s' in index %s"%(query,index)) |
|
#import Products.ZCTextIndex.QueryParser |
|
#qp = QueryParser.QueryParser() |
|
#logging.debug() |
idxQuery = {index:{'query':query}} |
idxQuery = {index:{'query':query}} |
idx = getattr(self, self.file_catalog) |
idx = getattr(self, self.file_catalog) |
results = [] |
|
# do search |
# do search |
resultset = idx.search(idxQuery) |
resultset = idx.search(query_request=idxQuery,sort_index='textid') |
for res in resultset: |
|
# put only the P-Number in the result |
# put only the P-Number in the result |
results.append(res.getId[:7]) |
results = [res.getId[:7] for res in resultset] |
|
logging.debug("searchtext: found %d texts"%len(results)) |
return results |
return results |
|
|
# from PluginINdexes.common.util.py:parseIndexRequest: |
|
# |
def getFile(self, pnum): |
# The class understands the following type of parameters: |
"""get the translit file with the given pnum""" |
# |
f = getattr(self, self.file_catalog).search({'textid':pnum}) |
# - old-style parameters where the query for an index as value inside |
if not f: |
# the request directory where the index name is the name of the key. |
return "" |
# Additional parameters for an index could be passed as index+"_usage" ... |
|
# |
return f[0].getObject().getData() |
# |
|
# - dictionary-style parameters specify a query for an index as |
|
# an entry in the request dictionary where the key corresponds to the |
|
# name of the index and the key is a dictionary with the parameters |
|
# passed to the index. |
|
# |
|
# Allowed keys of the parameter dictionary: |
|
# |
|
# 'query' - contains the query (either string, list or tuple) (required) |
|
# |
|
# other parameters depend on the the index |
|
# |
|
# |
|
# - record-style parameters specify a query for an index as instance of the |
|
# Record class. This happens usually when parameters from a web form use |
|
# the "record" type e.g. <input type="text" name="path.query:record:string">. |
|
# All restrictions of the dictionary-style parameters apply to the record-style |
|
# parameters |
|
|
|
|
|
|
|
Line 2114 class CDLIRoot(Folder):
|
Line 2162 class CDLIRoot(Folder):
|
"""show a file |
"""show a file |
@param fileId: P-Number of the document to be displayed |
@param fileId: P-Number of the document to be displayed |
""" |
""" |
f=self.CDLICatalog({'title':fileId}) |
f=getattr(self, self.file_catalog).search({'textid':fileId}) |
if not f: |
if not f: |
return "" |
return "" |
|
|
Line 2125 class CDLIRoot(Folder):
|
Line 2173 class CDLIRoot(Folder):
|
return f[0].getObject().getLastVersionFormattedData() |
return f[0].getObject().getLastVersionFormattedData() |
|
|
|
|
def showWordInFile(self,fileId,word,lineList=None,regExp=False,indexName=""): |
def showWordInFile(self,fileId,word,indexName='graphemes',regExp=False,): |
"""get lines with word from FileId""" |
"""get lines with word from FileId""" |
|
logging.debug("showwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId)) |
|
|
file=self.showFile(fileId) |
file = formatAtfFullLineNum(self.getFile(fileId)) |
logging.debug("show word regEXP %s"%regExp) |
|
ret=[] |
ret=[] |
if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen |
|
wordlist=self.findWordRegExp(indexName,word) |
|
else: |
|
wordlist=[word] |
|
|
|
for line in file.split("\n"): |
# add whitespace before and whitespace and line-end to splitter bounds expressions |
line = formatAtfLineHtml(unicodify(line)) |
bounds = self.splitter[indexName].bounds |
found=False |
splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds) |
|
# clean word expression |
|
# TODO: this should use QueryParser itself |
|
# take out double quotes |
|
word = word.replace('"','') |
|
# take out ignorable signs |
|
ignorable = self.splitter[indexName].ignorex |
|
word = ignorable.sub('', word) |
|
# compile into regexp objects and escape parens |
|
wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')] |
|
|
|
for line in file.splitlines(): |
for word in wordlist: |
for word in wordlist: |
try: # just a hack because of possible unicode errors in line |
#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line))) |
if line.find(word)>-1: |
if word.search(ignorable.sub('',line)): |
if lineList: #liste of moeglichen Zeilennummern |
line = formatAtfLineHtml(line) |
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile |
|
if num in lineList: |
|
ret.append(line) |
|
else: # nimm alles ohne line check |
|
ret.append(line) |
ret.append(line) |
break; |
break |
except: |
|
pass |
|
return ret |
return ret |
|
|
|
|
def tagWordInFile(self,fileId,word,indexName='words',regExp=False): |
def showWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False): |
|
""" |
|
get lines with word from all ids in list FileIds. |
|
returns dict with id:lines pairs. |
|
""" |
|
logging.debug("showwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds)) |
|
|
|
return dict([(id,self.showWordInFile(id, word, indexName, regExp)) for id in fileIds]) |
|
|
|
|
|
def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False): |
"""get text with word highlighted from FileId""" |
"""get text with word highlighted from FileId""" |
|
logging.debug("tagwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId)) |
|
|
file=self.showFile(fileId) |
file=self.getFile(fileId) |
tagStr=u'<span class="found">%s</span>' |
tagStart=u'<span class="found">' |
|
tagEnd=u'</span>' |
|
tagStr=tagStart + u'%%s' + tagEnd |
ret=[] |
ret=[] |
# search using lowercase |
|
word = word.lower() |
|
|
|
if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen |
# add whitespace to splitter bounds expressions and compile into regexp object |
wordlist=self.findWordRegExp(indexName,word) |
bounds = self.splitter[indexName].bounds |
else: |
wordsplit = re.compile("(%s|\s)"%bounds) |
# split the search term into words according to the corresponding splitter |
# clean word expression |
#try: |
# TODO: this should use QueryParser itself |
wordlist = self.splitter[indexName].process([word]) |
word = word.replace('"','') # take out double quotes |
#except: |
# take out ignoreable signs |
# wordlist=[word] |
ignorable = self.splitter[indexName].ignorex |
|
word = ignorable.sub('', word) |
for line in file.split("\n"): |
# split search terms by blanks |
line = formatAtfLineHtml(unicodify(line)) |
words = word.split(' ') |
if not line: |
# split search terms again (for grapheme search with words) |
# formatAtf can produce empty lines |
splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words)) |
|
|
|
for line in file.splitlines(): |
|
line = unicodify(line) |
|
# ignore lemma and other lines |
|
if line.lstrip().startswith('#lem:'): |
|
continue |
|
# ignore p-num line |
|
if line.startswith('&P'): |
|
continue |
|
# ignore version lines |
|
if line.startswith('#version'): |
continue |
continue |
|
# ignore atf type lines |
|
if line.startswith('#atf:'): |
|
continue |
|
|
|
# first scan |
|
hitwords = [] |
|
for w in words: |
|
if ignorable.sub('',line).find(w) > -1: |
|
# word is in line |
|
# append split word for grapheme search with words |
|
hitwords.extend(splitwords[w]) |
|
#hitwords.extend(wordsplit.split(w)) |
|
|
|
# examine hits closer |
|
if hitwords: |
|
# split line into words |
|
parts = wordsplit.split(line) |
|
line = "" |
|
for p in parts: |
|
#logging.debug("tagwordinfile: searching for %s in %s"%(p,hitwords)) |
|
# reassemble line |
|
if ignorable.sub('', p) in hitwords: |
|
#logging.debug("tagwordinfile: found %s in %s"%(p,hitwords)) |
|
# this part was found |
|
line += tagStart + formatAtfHtml(p) + tagEnd |
|
else: |
|
line += formatAtfHtml(p) |
|
|
for w in wordlist: |
else: |
if line.lower().find(w)>-1: |
# no hits |
#word ist gefunden dann makiere |
line = formatAtfHtml(line) |
line = line.replace(w,tagStr%w) |
|
|
|
ret.append(line) |
ret.append(line) |
|
|
return u'<br>\n'.join(ret) |
return u'<br>\n'.join(ret) |
|
|
|
|
|
|
|
def tagWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False): |
|
""" |
|
get texts with highlighted word from all ids in list FileIds. |
|
returns dict with id:text pairs. |
|
""" |
|
logging.debug("tagwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds)) |
|
return dict([(id,self.tagWordInFile(id, word, indexName, regExp)) for id in fileIds]) |
|
|
|
|
def URLquote(self,str): |
def URLquote(self,str): |
"""quote url""" |
"""quote url""" |
return urllib.quote(str) |
return urllib.quote(str) |
Line 2441 class CDLIRoot(Folder):
|
Line 2550 class CDLIRoot(Folder):
|
|
|
def importFiles(self,comment="",author="" ,folderName="/Users/dwinter/atf", files=None,ext=None): |
def importFiles(self,comment="",author="" ,folderName="/Users/dwinter/atf", files=None,ext=None): |
"""import files""" |
"""import files""" |
|
logging.debug("importFiles folderName=%s files=%s ext=%s"%(folderName,files,ext)) |
root=self.cdli_main |
root=self.cdli_main |
count=0 |
count=0 |
if not files: |
if not files: |
Line 2450 class CDLIRoot(Folder):
|
Line 2560 class CDLIRoot(Folder):
|
folder=f[0:3] |
folder=f[0:3] |
f2=f[0:5] |
f2=f[0:5] |
obj=self.ZopeFind(root,obj_ids=[folder]) |
obj=self.ZopeFind(root,obj_ids=[folder]) |
|
logging.debug("importFiles: folder=%s f2=%s obj=%s"%(folder,f2,obj)) |
if ext: |
if ext: |
|
|
ext.result="<p>adding: %s </p>"%f+ext.result |
ext.result="<p>adding: %s </p>"%f+ext.result |
|
|
if not obj: |
if not obj: |
manage_addCDLIFileFolder(root,folder,folder) |
manage_addCDLIFileFolder(root,folder,folder) |
fobj=getattr(root,folder) |
fobj=getattr(root,folder) |
#transaction.get().commit() |
#transaction.get().commit() |
|
|
else: |
else: |
fobj=obj[0][1] |
fobj=obj[0][1] |
|
|
obj2=fobj.ZopeFind(fobj,obj_ids=[f2]) |
obj2=fobj.ZopeFind(fobj,obj_ids=[f2]) |
|
logging.debug("importFiles: fobj=%s obj2=%s"%(fobj,obj2)) |
|
|
if not obj2: |
if not obj2: |
manage_addCDLIFileFolder(fobj,f2,f2) |
manage_addCDLIFileFolder(fobj,f2,f2) |
Line 2471 class CDLIRoot(Folder):
|
Line 2584 class CDLIRoot(Folder):
|
|
|
file2=os.path.join(folderName,f) |
file2=os.path.join(folderName,f) |
id=f |
id=f |
manage_addCDLIFile(fobj2,f,'','') |
logging.debug("importFiles: addCDLIFile fobj2=%s, f=%s file2=%s"%(fobj2,repr(f),repr(file2))) |
id=f |
fobj2.addFile(vC='',file=file(file2),author=author,newName=f) |
ob=fobj2._getOb(f) |
|
ob.title=id |
|
|
|
manage_addCDLIFileObject(ob,id,comment,author,file2,content_type='',from_tmp=True) |
|
self.CDLICatalog.catalog_object(ob) |
|
#self.CDLICatalog.manage_catalogFoundItems(obj_ids=[id],search_sub=1) |
|
#self.CDLICatalog.manage_catalogObject(self.REQUEST, self.REQUEST.RESPONSE, 'CDLICatalog', urlparse.urlparse(ob.absolute_url())[1]) |
|
count+=1 |
count+=1 |
|
|
if count > 1000: |
if count%100==0: |
print "committing" |
logging.debug("importfiles: committing") |
transaction.get().commit() |
transaction.get().commit() |
count=0 |
|
transaction.get().commit() |
transaction.get().commit() |
return "ok" |
return "ok" |
|
|