File:  [Repository] / cdli / cdli_helpers.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Fri May 15 13:13:31 2009 UTC (15 years, 1 month ago) by dwinter
Branches: MAIN
CVS tags: HEAD
arbeit an admin funktionen

    1: def makelist(mySet):
    2:         x = list(mySet)
    3:         x.sort()
    4:         return x
    5:     
    6: def unicodify(s):
    7:     """decode str (utf-8 or latin-1 representation) into unicode object"""
    8:     if not s:
    9:         return u""
   10:     if isinstance(s, str):
   11:         try:
   12:             return s.decode('utf-8')
   13:         except:
   14:             return s.decode('latin-1')
   15:     else:
   16:         return s
   17: 
   18: def utf8ify(s):
   19:     """encode unicode object or string into byte string in utf-8 representation.
   20:        assumes string objects to be utf-8"""
   21:     if not s:
   22:         return ""
   23:     if isinstance(s, str):
   24:         return s
   25:     else:
   26:         return s.encode('utf-8')
   27: 
   28: def formatAtfHtml(l):
   29:     """escape special ATF characters for HTML"""
   30:     if not l:
   31:         return ""
   32: 
   33:     # replace &
   34:     l = l.replace('&','&')
   35:     # replace angular brackets
   36:     l = l.replace('<','&lt;')
   37:     l = l.replace('>','&gt;')
   38:     return l
   39: 
   40: def formatAtfLineHtml(l, nolemma=True):
   41:     """format ATF line for HTML"""
   42:     if not l:
   43:         return ""
   44: 
   45:     if nolemma:
   46:         # ignore lemma lines
   47:         if l.lstrip().startswith('#lem:'):
   48:             return ""
   49:     
   50:     return formatAtfHtml(l)
   51: 
   52: 
   53: 
   54: def formatAtfFullLineNum(txt, nolemma=True):
   55:     """format full line numbers in ATF text"""
   56:     # surface codes
   57:     surfaces = {'@obverse':'obv',
   58:                 '@reverse':'rev',
   59:                 '@surface':'surface',
   60:                 '@edge':'edge',
   61:                 '@left':'left',
   62:                 '@right':'right',
   63:                 '@top':'top',
   64:                 '@bottom':'bottom',
   65:                 '@face':'face',
   66:                 '@seal':'seal'}
   67: 
   68:     if not txt:
   69:         return ""
   70:     
   71:     ret = []
   72:     surf = ""
   73:     col = ""
   74:     for line in txt.splitlines():
   75:         line = unicodify(line)
   76:         if line and line[0] == '@':
   77:             # surface or column
   78:             words = line.split(' ')
   79:             if words[0] in surfaces:
   80:                 surf = line.replace(words[0],surfaces[words[0]]).strip()
   81:             
   82:             elif words[0] == '@column':
   83:                 col = ' '.join(words[1:])
   84:             
   85:         elif line and line[0] in '123456789':
   86:             # ordinary line -> add line number
   87:             line = "%s:%s:%s"%(surf,col,line)
   88:             
   89:         ret.append(line)
   90:     
   91:     return '\n'.join(ret)
   92:             
   93:             
   94: def generateXMLReturn(hash):
   95:     """erzeugt das xml file als returnwert fuer uploadATFRPC"""
   96: 
   97:     ret="<return>"
   98:     
   99:     ret+="<errors>"
  100:     for error in hash['errors']:
  101:         ret+="""<error atf="%s">%s</error>"""%error
  102:     
  103:     ret+="</errors>"
  104:     
  105:     ret+="<changes>"
  106:     for changed in hash['changed']:
  107:         ret+="""<change atf="%s">%s</change>"""%changed
  108:     ret+="</changes>"
  109:     
  110:     ret+="<newPs>"
  111:     for new in hash['newPs']:
  112:         ret+="""<new atf="%s"/>"""%new
  113:     ret+="</newPs>"
  114:     
  115:     ret+="</return>"
  116:     return ret
  117:     
  118:     
  119: def unique(s):
  120:     """Return a list of the elements in s, but without duplicates.
  121: 
  122:     For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
  123:     unique("abcabc") some permutation of ["a", "b", "c"], and
  124:     unique(([1, 2], [2, 3], [1, 2])) some permutation of
  125:     [[2, 3], [1, 2]].
  126: 
  127:     For best speed, all sequence elements should be hashable.  Then
  128:     unique() will usually work in linear time.
  129: 
  130:     If not possible, the sequence elements should enjoy a total
  131:     ordering, and if list(s).sort() doesn't raise TypeError it's
  132:     assumed that they do enjoy a total ordering.  Then unique() will
  133:     usually work in O(N*log2(N)) time.
  134: 
  135:     If that's not possible either, the sequence elements must support
  136:     equality-testing.  Then unique() will usually work in quadratic
  137:     time.
  138:     (from the python cookbook)
  139:     """
  140: 
  141:     n = len(s)
  142:     if n == 0:
  143:         return []
  144: 
  145:     # Try using a dict first, as that's the fastest and will usually
  146:     # work.  If it doesn't work, it will usually fail quickly, so it
  147:     # usually doesn't cost much to *try* it.  It requires that all the
  148:     # sequence elements be hashable, and support equality comparison.
  149:     u = {}
  150:     try:
  151:         for x in s:
  152:             u[x] = 1
  153:     except TypeError:
  154:         del u  # move on to the next method
  155:     else:
  156:         return u.keys()
  157: 
  158:     # We can't hash all the elements.  Second fastest is to sort,
  159:     # which brings the equal elements together; then duplicates are
  160:     # easy to weed out in a single pass.
  161:     # NOTE:  Python's list.sort() was designed to be efficient in the
  162:     # presence of many duplicate elements.  This isn't true of all
  163:     # sort functions in all languages or libraries, so this approach
  164:     # is more effective in Python than it may be elsewhere.
  165:     try:
  166:         t = list(s)
  167:         t.sort()
  168:     except TypeError:
  169:         del t  # move on to the next method
  170:     else:
  171:         assert n > 0
  172:         last = t[0]
  173:         lasti = i = 1
  174:         while i < n:
  175:             if t[i] != last:
  176:                 t[lasti] = last = t[i]
  177:                 lasti += 1
  178:             i += 1
  179:         return t[:lasti]
  180: 
  181:     # Brute force is all that's left.
  182:     u = []
  183:     for x in s:
  184:         if x not in u:
  185:             u.append(x)
  186:     return u
  187: 
  188: 
  189:  

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>