--- cdli/cdli_files.py 2008/01/07 16:54:46 1.80.2.14 +++ cdli/cdli_files.py 2008/01/14 18:43:21 1.80.2.15 @@ -99,7 +99,7 @@ def formatAtfFullLineNum(txt, nolemma=Tr ret = [] surf = "" col = "" - for line in txt.split("\n"): + for line in txt.splitlines(): line = unicodify(line) if line and line[0] == '@': # surface or column @@ -632,7 +632,7 @@ class CDLIBasketContainer(OrderedFolder) ret+=str(object[0].getData())+"\n" elif current=="yes": #search current object - logging.info("crrent: %s"%object[1].getId().split(".")[0]) + logging.debug("current: %s"%object[1].getId().split(".")[0]) founds=self.CDLICatalog.search({'title':object[1].getId().split(".")[0]}) if len(founds)>0: ret+=str(founds[0].getObject().getLastVersion().getData())+"\n" @@ -1791,6 +1791,7 @@ def splitatf(fh,dir=None,ext=None): nf=None i=0 + #ROC: why split \n first and then \r??? if (type(fh) is StringType) or (type(fh) is UnicodeType): iter=fh.split("\n") else: @@ -2191,7 +2192,7 @@ class CDLIRoot(Folder): # compile into regexp objects and escape parens wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')] - for line in file.split("\n"): + for line in file.splitlines(): for word in wordlist: #logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line))) if word.search(ignorable.sub('',line)): @@ -2236,11 +2237,20 @@ class CDLIRoot(Folder): # split search terms again (for grapheme search with words) splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words)) - for line in file.split("\n"): + for line in file.linesplit(): line = unicodify(line) - # ignore lemma lines + # ignore lemma and other lines if line.lstrip().startswith('#lem:'): continue + # ignore p-num line + if line.startswith('&P'): + continue + # ignore version lines + if line.startswith('#version'): + continue + # ignore atf type lines + if line.startswith('#atf:'): + continue # first scan hitwords = []