--- ECHO_content/authorSplitter.py 2006/02/03 20:41:29 1.1 +++ ECHO_content/authorSplitter.py 2010/02/15 19:03:28 1.3 @@ -43,7 +43,8 @@ class authorSplitter: splitted = s.split(";") for w in splitted: - result.append(w.lstrip().rstrip()) + if not (w[0:2]=="!!"): #don't index !!NOT USED.... + result.append(w.lstrip().rstrip()) return result @@ -55,7 +56,7 @@ except: pass if __name__ == '__main__': - a = 'abc def我们的很 好。' + a = 'abc def\U00CE\U00D2\U00D3\U00C7\U00B5\U00C4\U00DC\U00C3\U00A1\U00A3' u = unicode(a, 'gbk') s = authorSplitter() print s.process([u])