File:  [Repository] / kupu / python / spellcheck.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Thu Sep 15 13:06:00 2005 UTC (18 years, 9 months ago) by dwinter
CVS tags: MAIN, HEAD
Initial revision

    1: #!/usr/bin/python
    2: 
    3: """SpellChecker for Kupu"""
    4: 
    5: COMMAND = 'aspell -a'
    6: 
    7: import popen2, re
    8: 
    9: try:
   10:     from Globals import ClassSecurityInfo
   11: except ImportError:
   12:     pass
   13: else:
   14:     # hmmm... Zope 2...
   15:     __allow_access_to_unprotected_subobjects__ = 1
   16: 
   17: class SpellChecker:
   18:     """Simple spell checker, uses ispell (or aspell) with pipes"""
   19: 
   20:     __allow_access_to_unprotected_subobjects__ = 1
   21: 
   22:     reg_unknown = re.compile('^& (.*?) \d* \d*: (.*)$', re.U)
   23:     reg_unknown_no_replacement = re.compile('^\# (.*?) \d*.*$', re.U)
   24: 
   25:     def __init__(self):
   26:         self.chout, self.chin = popen2.popen2(COMMAND)
   27:         # throw away intro
   28:         self.read_line()
   29: 
   30:     def __del__(self):
   31:         self.chout.close()
   32:         self.chin.close()
   33: 
   34:     def check(self, text):
   35:         """checks a line of text
   36:         
   37:             returns None if spelling was okay, and an HTML string with words 
   38:             that weren't recognized marked (with a span class="wrong_spelling")
   39:         """
   40:         result = {}
   41:         for line in text.split('\n'):
   42:             line = line.strip()
   43:             if line:
   44:                 self.write_line(line)
   45:             while 1:
   46:                 resline = self.read_line()
   47:                 if not resline.strip():
   48:                     break
   49:                 if resline.strip() != '*':
   50:                     match = self.reg_unknown.match(resline)
   51:                     have_replacement = True
   52:                     if not match:
   53:                         match = self.reg_unknown_no_replacement.match(resline)
   54:                         have_replacement = False
   55:                     assert match, 'Unknown formatted line: %s' % resline
   56:                     word = match.group(1)
   57:                     if result.has_key(word):
   58:                         continue
   59:                     replacements = []
   60:                     if have_replacement:
   61:                         replacements = match.group(2).split(', ')
   62:                     result[word] = replacements
   63:         return result
   64: 
   65:     def read_line(self):
   66:         buf = []
   67:         while 1:
   68:             char = self.read_char()
   69:             if char == '\n':
   70:                 return ''.join(buf)
   71:             buf.append(char)
   72: 
   73:     def write_line(self, line):
   74:         try:
   75:             self.chin.write('%s\n' % line)
   76:             self.chin.flush()
   77:             return
   78:         except IOError:
   79:             self.reconnect()
   80:             self.chin.write('%s\n' % line)
   81:             self.chin.flush()
   82:             return
   83:         raise
   84: 
   85:     def read_char(self):
   86:         try:
   87:             return self.chout.read(1)
   88:         except IOError:
   89:             self.reconnect()
   90:             return self.chout.read(1)
   91:         raise
   92: 
   93:     def reconnect(self):
   94:         try:
   95:             self.chout.close()
   96:         except IOError:
   97:             pass
   98:         try:
   99:             self.chin.close()
  100:         except IOError:
  101:             pass
  102:         self.chout, self.chin = popen2.popen2(COMMAND)
  103: 
  104: def format_result(result):
  105:     """convert the result dict to XML"""
  106:     buf = ['<?xml version="1.0" encoding="UTF-8" ?>\n<spellcheck_result>']
  107:     for key, value in result.items():
  108:         buf.append('<incorrect><word>')
  109:         buf.append(key)
  110:         buf.append('</word><replacements>')
  111:         buf.append(' '.join(value))
  112:         buf.append('</replacements></incorrect>')
  113:     buf.append('</spellcheck_result>')
  114:     return ''.join(buf)
  115: 
  116: if __name__ == '__main__':
  117:     c = SpellChecker()
  118:     while 1:
  119:         line = raw_input('Enter text to check: ')
  120:         if line == 'q':
  121:             break
  122:         ret = c.check(line)
  123:         if ret is None:
  124:             print 'okay'
  125:         else:
  126:             print ret

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>