version 1.80.2.14, 2008/01/07 16:54:46
|
version 1.80.2.15, 2008/01/14 18:43:21
|
Line 99 def formatAtfFullLineNum(txt, nolemma=Tr
|
Line 99 def formatAtfFullLineNum(txt, nolemma=Tr
|
ret = [] |
ret = [] |
surf = "" |
surf = "" |
col = "" |
col = "" |
for line in txt.split("\n"): |
for line in txt.splitlines(): |
line = unicodify(line) |
line = unicodify(line) |
if line and line[0] == '@': |
if line and line[0] == '@': |
# surface or column |
# surface or column |
Line 632 class CDLIBasketContainer(OrderedFolder)
|
Line 632 class CDLIBasketContainer(OrderedFolder)
|
ret+=str(object[0].getData())+"\n" |
ret+=str(object[0].getData())+"\n" |
elif current=="yes": |
elif current=="yes": |
#search current object |
#search current object |
logging.info("crrent: %s"%object[1].getId().split(".")[0]) |
logging.debug("current: %s"%object[1].getId().split(".")[0]) |
founds=self.CDLICatalog.search({'title':object[1].getId().split(".")[0]}) |
founds=self.CDLICatalog.search({'title':object[1].getId().split(".")[0]}) |
if len(founds)>0: |
if len(founds)>0: |
ret+=str(founds[0].getObject().getLastVersion().getData())+"\n" |
ret+=str(founds[0].getObject().getLastVersion().getData())+"\n" |
Line 1791 def splitatf(fh,dir=None,ext=None):
|
Line 1791 def splitatf(fh,dir=None,ext=None):
|
nf=None |
nf=None |
i=0 |
i=0 |
|
|
|
#ROC: why split \n first and then \r??? |
if (type(fh) is StringType) or (type(fh) is UnicodeType): |
if (type(fh) is StringType) or (type(fh) is UnicodeType): |
iter=fh.split("\n") |
iter=fh.split("\n") |
else: |
else: |
Line 2191 class CDLIRoot(Folder):
|
Line 2192 class CDLIRoot(Folder):
|
# compile into regexp objects and escape parens |
# compile into regexp objects and escape parens |
wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')] |
wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')] |
|
|
for line in file.split("\n"): |
for line in file.splitlines(): |
for word in wordlist: |
for word in wordlist: |
#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line))) |
#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line))) |
if word.search(ignorable.sub('',line)): |
if word.search(ignorable.sub('',line)): |
Line 2236 class CDLIRoot(Folder):
|
Line 2237 class CDLIRoot(Folder):
|
# split search terms again (for grapheme search with words) |
# split search terms again (for grapheme search with words) |
splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words)) |
splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words)) |
|
|
for line in file.split("\n"): |
for line in file.linesplit(): |
line = unicodify(line) |
line = unicodify(line) |
# ignore lemma lines |
# ignore lemma and other lines |
if line.lstrip().startswith('#lem:'): |
if line.lstrip().startswith('#lem:'): |
continue |
continue |
|
# ignore p-num line |
|
if line.startswith('&P'): |
|
continue |
|
# ignore version lines |
|
if line.startswith('#version'): |
|
continue |
|
# ignore atf type lines |
|
if line.startswith('#atf:'): |
|
continue |
|
|
# first scan |
# first scan |
hitwords = [] |
hitwords = [] |