version 1.80.2.5, 2007/10/26 22:45:12
|
version 1.80.2.6, 2007/11/02 15:37:46
|
Line 53 def utf8ify(s):
|
Line 53 def utf8ify(s):
|
else: |
else: |
return s.encode('utf-8') |
return s.encode('utf-8') |
|
|
def formatAtfLineHtml(l, nolemma=True): |
def formatAtfHtml(l): |
"""escape special ATF characters for HTML""" |
"""escape special ATF characters for HTML""" |
if not l: |
if not l: |
return "" |
return "" |
|
|
if nolemma: |
|
# ignore lemma lines |
|
if l.lstrip().startswith('#lem:'): |
|
return "" |
|
# replace & |
# replace & |
l = l.replace('&','&') |
l = l.replace('&','&') |
# replace angular brackets |
# replace angular brackets |
Line 69 def formatAtfLineHtml(l, nolemma=True):
|
Line 65 def formatAtfLineHtml(l, nolemma=True):
|
l = l.replace('>','>') |
l = l.replace('>','>') |
return l |
return l |
|
|
|
def formatAtfLineHtml(l, nolemma=True): |
|
"""format ATF line for HTML""" |
|
if not l: |
|
return "" |
|
|
|
if nolemma: |
|
# ignore lemma lines |
|
if l.lstrip().startswith('#lem:'): |
|
return "" |
|
|
|
return formatAtfHtml(l) |
|
|
|
|
|
|
|
def formatAtfFullLineNum(txt, nolemma=True): |
|
"""format full line numbers in ATF text""" |
|
# surface codes |
|
surfaces = {'@obverse':'obv', |
|
'@reverse':'rev', |
|
'@surface':'surface', |
|
'@edge':'edge', |
|
'@left':'left', |
|
'@right':'right', |
|
'@top':'top', |
|
'@bottom':'bottom', |
|
'@face':'face', |
|
'@seal':'seal'} |
|
|
|
if not txt: |
|
return "" |
|
|
|
ret = [] |
|
surf = "" |
|
col = "" |
|
for line in txt.split("\n"): |
|
line = unicodify(line) |
|
if line and line[0] == '@': |
|
# surface or column |
|
words = line.split(' ') |
|
if words[0] in surfaces: |
|
surf = line.replace(words[0],surfaces[words[0]]).strip() |
|
|
|
elif words[0] == '@column': |
|
col = words[1] |
|
|
|
elif line and line[0] in '123456789': |
|
# ordinary line -> add line number |
|
line = "%s:%s:%s"%(surf,col,line) |
|
|
|
ret.append(line) |
|
|
|
return '\n'.join(ret) |
|
|
|
|
def generateXMLReturn(hash): |
def generateXMLReturn(hash): |
"""erzeugt das xml file als returnwert fuer uploadATFRPC""" |
"""erzeugt das xml file als returnwert fuer uploadATFRPC""" |
Line 1584 class CDLIFile(extVersionedFile,CatalogA
|
Line 1633 class CDLIFile(extVersionedFile,CatalogA
|
content_meta_type = ["CDLI File Object"] |
content_meta_type = ["CDLI File Object"] |
|
|
default_catalog='CDLICatalog' |
default_catalog='CDLICatalog' |
|
|
security.declareProtected('manage','index_html') |
security.declareProtected('manage','index_html') |
|
|
def getLastVersionData(self): |
def getLastVersionData(self): |
Line 1594 class CDLIFile(extVersionedFile,CatalogA
|
Line 1644 class CDLIFile(extVersionedFile,CatalogA
|
"""get last version data""" |
"""get last version data""" |
return self.getContentObject().getFormattedData() |
return self.getContentObject().getFormattedData() |
|
|
|
def getTextId(self): |
|
"""returns P-number of text""" |
|
# assuming that its the beginning of the title |
|
return self.title[:7] |
|
|
#security.declarePublic('history') |
#security.declarePublic('history') |
def history(self): |
def history(self): |
"""history""" |
"""history""" |
Line 1796 class CDLIFileFolder(extVersionedFileFol
|
Line 1851 class CDLIFileFolder(extVersionedFileFol
|
file_meta_type=['CDLI file'] |
file_meta_type=['CDLI file'] |
folder_meta_type=['CDLI Folder'] |
folder_meta_type=['CDLI Folder'] |
|
|
default_catalog='CDLICatalog' |
file_catalog='CDLICatalog' |
defaultFileCatalog=default_catalog #wenn dieses definiert ist, wird beim hinzufuegen einer neuen version eines files dieser catalog neuindiziert |
|
#downloadCounter=0 # counts how many download for all files currently run, be mehr als 5 wird verweigert. |
#downloadCounter=0 # counts how many download for all files currently run, be mehr als 5 wird verweigert. |
tmpStore2={} |
tmpStore2={} |
|
|
Line 1834 class CDLIFileFolder(extVersionedFileFol
|
Line 1889 class CDLIFileFolder(extVersionedFileFol
|
logging.debug("getFile: %s"%repr(fn)) |
logging.debug("getFile: %s"%repr(fn)) |
if not self.hasObject(fn): |
if not self.hasObject(fn): |
# search deeper |
# search deeper |
founds=self.CDLICatalog.search({'title':fn}) |
founds=getattr(self, self.file_catalog).search({'textid':fn}) |
if founds: |
if founds: |
obj=founds[0].getObject().getContentObject() |
obj=founds[0].getObject().getContentObject() |
else: |
else: |
Line 1937 class CDLIFileFolder(extVersionedFileFol
|
Line 1992 class CDLIFileFolder(extVersionedFileFol
|
def sortF(x,y): |
def sortF(x,y): |
return cmp(x[0],y[0]) |
return cmp(x[0],y[0]) |
|
|
catalog=getattr(self,self.default_catalog) |
catalog=getattr(self,self.file_catalog) |
#tf,tfilename=mkstemp() |
#tf,tfilename=mkstemp() |
if not hasattr(self.temp_folder,'downloadCounter'): |
if not hasattr(self.temp_folder,'downloadCounter'): |
self.temp_folder.downloadCounter=0 |
self.temp_folder.downloadCounter=0 |
Line 2069 class CDLIRoot(Folder):
|
Line 2124 class CDLIRoot(Folder):
|
|
|
|
|
|
|
def searchText(self, query, index='words'): |
def searchText(self, query, index='graphemes'): |
"""searches query in the fulltext index and returns a list of file ids/P-numbers""" |
"""searches query in the fulltext index and returns a list of file ids/P-numbers""" |
|
# see also: http://www.plope.com/Books/2_7Edition/SearchingZCatalog.stx#2-13 |
idxQuery = {index:{'query':query}} |
idxQuery = {index:{'query':query}} |
idx = getattr(self, self.file_catalog) |
idx = getattr(self, self.file_catalog) |
results = [] |
|
# do search |
# do search |
resultset = idx.search(idxQuery) |
resultset = idx.search(query_request=idxQuery,sort_index='textid') |
for res in resultset: |
|
# put only the P-Number in the result |
# put only the P-Number in the result |
results.append(res.getId[:7]) |
results = [res.getId[:7] for res in resultset] |
return results |
return results |
|
|
# from PluginINdexes.common.util.py:parseIndexRequest: |
|
# |
def getFile(self, pnum): |
# The class understands the following type of parameters: |
"""get the translit file with the given pnum""" |
# |
f = getattr(self, self.file_catalog).search({'textid':pnum}) |
# - old-style parameters where the query for an index as value inside |
if not f: |
# the request directory where the index name is the name of the key. |
return "" |
# Additional parameters for an index could be passed as index+"_usage" ... |
|
# |
return f[0].getObject().getData() |
# |
|
# - dictionary-style parameters specify a query for an index as |
|
# an entry in the request dictionary where the key corresponds to the |
|
# name of the index and the key is a dictionary with the parameters |
|
# passed to the index. |
|
# |
|
# Allowed keys of the parameter dictionary: |
|
# |
|
# 'query' - contains the query (either string, list or tuple) (required) |
|
# |
|
# other parameters depend on the the index |
|
# |
|
# |
|
# - record-style parameters specify a query for an index as instance of the |
|
# Record class. This happens usually when parameters from a web form use |
|
# the "record" type e.g. <input type="text" name="path.query:record:string">. |
|
# All restrictions of the dictionary-style parameters apply to the record-style |
|
# parameters |
|
|
|
|
|
|
|
Line 2114 class CDLIRoot(Folder):
|
Line 2150 class CDLIRoot(Folder):
|
"""show a file |
"""show a file |
@param fileId: P-Number of the document to be displayed |
@param fileId: P-Number of the document to be displayed |
""" |
""" |
f=self.CDLICatalog({'title':fileId}) |
f=getattr(self, self.file_catalog).search({'textid':fileId}) |
if not f: |
if not f: |
return "" |
return "" |
|
|
Line 2128 class CDLIRoot(Folder):
|
Line 2164 class CDLIRoot(Folder):
|
def showWordInFile(self,fileId,word,indexName='graphemes',regExp=False,): |
def showWordInFile(self,fileId,word,indexName='graphemes',regExp=False,): |
"""get lines with word from FileId""" |
"""get lines with word from FileId""" |
|
|
file=self.showFile(fileId) |
file = formatAtfFullLineNum(self.getFile(fileId)) |
ret=[] |
ret=[] |
# search using lowercase |
|
word = word.lower() |
# add whitespace before and whitespace and line-end to splitter bounds expressions |
if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen |
bounds = self.splitter[indexName].bounds |
wordlist=self.findWordRegExp(indexName,word) |
splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds) |
else: |
# compile into regexp objects |
# split the search term into words according to the corresponding splitter |
wordlist = [re.compile(splitexp%w,re.IGNORECASE) for w in word.split(' ')] |
#try: |
|
wordlist = self.splitter[indexName].process([word]) |
|
#except: |
|
# wordlist=[word] |
|
|
|
for line in file.split("\n"): |
for line in file.split("\n"): |
line = formatAtfLineHtml(unicodify(line)) |
|
if not line: |
|
# formatAtf can produce empty lines |
|
continue |
|
for word in wordlist: |
for word in wordlist: |
if line.lower().find(word)>-1: |
#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,line)) |
|
if word.search(line): |
|
line = formatAtfLineHtml(line) |
ret.append(line) |
ret.append(line) |
|
break |
|
|
return ret |
return ret |
|
|
|
|
def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False): |
def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False): |
"""get text with word highlighted from FileId""" |
"""get text with word highlighted from FileId""" |
|
|
file=self.showFile(fileId) |
file=self.getFile(fileId) |
tagStr=u'<span class="found">%s</span>' |
tagStart=u'<span class="found">' |
|
tagEnd=u'</span>' |
|
tagStr=tagStart + u'%%s' + tagEnd |
ret=[] |
ret=[] |
# search using lowercase |
|
word = word.lower() |
|
|
|
if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen |
# add whitespace to splitter bounds expressions and compile into regexp object |
wordlist=self.findWordRegExp(indexName,word) |
bounds = self.splitter[indexName].bounds |
else: |
wordsplit = re.compile("(%s|\s)"%bounds) |
# split the search term into words according to the corresponding splitter |
# split search terms by blanks |
#try: |
words = word.split(' ') |
wordlist = self.splitter[indexName].process([word]) |
|
#except: |
|
# wordlist=[word] |
|
|
|
for line in file.split("\n"): |
for line in file.split("\n"): |
line = formatAtfLineHtml(unicodify(line)) |
line = unicodify(line) |
if not line: |
# ignore lemma lines |
# formatAtf can produce empty lines |
if line.lstrip().startswith('#lem:'): |
continue |
continue |
|
|
for w in wordlist: |
# first scan |
if line.lower().find(w)>-1: |
hitwords = [] |
#word ist gefunden dann makiere |
for w in words: |
line = line.replace(w,tagStr%w) |
if line.find(w) > -1: |
|
# word is in line |
|
hitwords.append(w) |
|
|
|
# examine hits closer |
|
if hitwords: |
|
# split line into words |
|
parts = wordsplit.split(line) |
|
line = "" |
|
for p in parts: |
|
# reassemble line |
|
if p in hitwords: |
|
# this part was found |
|
line += tagStart + formatAtfHtml(p) + tagEnd |
|
else: |
|
line += formatAtfHtml(p) |
|
|
|
else: |
|
# no hits |
|
line = formatAtfHtml(line) |
|
|
ret.append(line) |
ret.append(line) |
|
|
return u'<br>\n'.join(ret) |
return u'<br>\n'.join(ret) |
|
|
|
|
def URLquote(self,str): |
def URLquote(self,str): |
"""quote url""" |
"""quote url""" |
return urllib.quote(str) |
return urllib.quote(str) |