Annotation of cdli/cdli_helpers.py, revision 1.1
1.1 ! dwinter 1: def makelist(mySet):
! 2: x = list(mySet)
! 3: x.sort()
! 4: return x
! 5:
! 6: def unicodify(s):
! 7: """decode str (utf-8 or latin-1 representation) into unicode object"""
! 8: if not s:
! 9: return u""
! 10: if isinstance(s, str):
! 11: try:
! 12: return s.decode('utf-8')
! 13: except:
! 14: return s.decode('latin-1')
! 15: else:
! 16: return s
! 17:
! 18: def utf8ify(s):
! 19: """encode unicode object or string into byte string in utf-8 representation.
! 20: assumes string objects to be utf-8"""
! 21: if not s:
! 22: return ""
! 23: if isinstance(s, str):
! 24: return s
! 25: else:
! 26: return s.encode('utf-8')
! 27:
! 28: def formatAtfHtml(l):
! 29: """escape special ATF characters for HTML"""
! 30: if not l:
! 31: return ""
! 32:
! 33: # replace &
! 34: l = l.replace('&','&')
! 35: # replace angular brackets
! 36: l = l.replace('<','<')
! 37: l = l.replace('>','>')
! 38: return l
! 39:
! 40: def formatAtfLineHtml(l, nolemma=True):
! 41: """format ATF line for HTML"""
! 42: if not l:
! 43: return ""
! 44:
! 45: if nolemma:
! 46: # ignore lemma lines
! 47: if l.lstrip().startswith('#lem:'):
! 48: return ""
! 49:
! 50: return formatAtfHtml(l)
! 51:
! 52:
! 53:
! 54: def formatAtfFullLineNum(txt, nolemma=True):
! 55: """format full line numbers in ATF text"""
! 56: # surface codes
! 57: surfaces = {'@obverse':'obv',
! 58: '@reverse':'rev',
! 59: '@surface':'surface',
! 60: '@edge':'edge',
! 61: '@left':'left',
! 62: '@right':'right',
! 63: '@top':'top',
! 64: '@bottom':'bottom',
! 65: '@face':'face',
! 66: '@seal':'seal'}
! 67:
! 68: if not txt:
! 69: return ""
! 70:
! 71: ret = []
! 72: surf = ""
! 73: col = ""
! 74: for line in txt.splitlines():
! 75: line = unicodify(line)
! 76: if line and line[0] == '@':
! 77: # surface or column
! 78: words = line.split(' ')
! 79: if words[0] in surfaces:
! 80: surf = line.replace(words[0],surfaces[words[0]]).strip()
! 81:
! 82: elif words[0] == '@column':
! 83: col = ' '.join(words[1:])
! 84:
! 85: elif line and line[0] in '123456789':
! 86: # ordinary line -> add line number
! 87: line = "%s:%s:%s"%(surf,col,line)
! 88:
! 89: ret.append(line)
! 90:
! 91: return '\n'.join(ret)
! 92:
! 93:
! 94: def generateXMLReturn(hash):
! 95: """erzeugt das xml file als returnwert fuer uploadATFRPC"""
! 96:
! 97: ret="<return>"
! 98:
! 99: ret+="<errors>"
! 100: for error in hash['errors']:
! 101: ret+="""<error atf="%s">%s</error>"""%error
! 102:
! 103: ret+="</errors>"
! 104:
! 105: ret+="<changes>"
! 106: for changed in hash['changed']:
! 107: ret+="""<change atf="%s">%s</change>"""%changed
! 108: ret+="</changes>"
! 109:
! 110: ret+="<newPs>"
! 111: for new in hash['newPs']:
! 112: ret+="""<new atf="%s"/>"""%new
! 113: ret+="</newPs>"
! 114:
! 115: ret+="</return>"
! 116: return ret
! 117:
! 118:
! 119: def unique(s):
! 120: """Return a list of the elements in s, but without duplicates.
! 121:
! 122: For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
! 123: unique("abcabc") some permutation of ["a", "b", "c"], and
! 124: unique(([1, 2], [2, 3], [1, 2])) some permutation of
! 125: [[2, 3], [1, 2]].
! 126:
! 127: For best speed, all sequence elements should be hashable. Then
! 128: unique() will usually work in linear time.
! 129:
! 130: If not possible, the sequence elements should enjoy a total
! 131: ordering, and if list(s).sort() doesn't raise TypeError it's
! 132: assumed that they do enjoy a total ordering. Then unique() will
! 133: usually work in O(N*log2(N)) time.
! 134:
! 135: If that's not possible either, the sequence elements must support
! 136: equality-testing. Then unique() will usually work in quadratic
! 137: time.
! 138: (from the python cookbook)
! 139: """
! 140:
! 141: n = len(s)
! 142: if n == 0:
! 143: return []
! 144:
! 145: # Try using a dict first, as that's the fastest and will usually
! 146: # work. If it doesn't work, it will usually fail quickly, so it
! 147: # usually doesn't cost much to *try* it. It requires that all the
! 148: # sequence elements be hashable, and support equality comparison.
! 149: u = {}
! 150: try:
! 151: for x in s:
! 152: u[x] = 1
! 153: except TypeError:
! 154: del u # move on to the next method
! 155: else:
! 156: return u.keys()
! 157:
! 158: # We can't hash all the elements. Second fastest is to sort,
! 159: # which brings the equal elements together; then duplicates are
! 160: # easy to weed out in a single pass.
! 161: # NOTE: Python's list.sort() was designed to be efficient in the
! 162: # presence of many duplicate elements. This isn't true of all
! 163: # sort functions in all languages or libraries, so this approach
! 164: # is more effective in Python than it may be elsewhere.
! 165: try:
! 166: t = list(s)
! 167: t.sort()
! 168: except TypeError:
! 169: del t # move on to the next method
! 170: else:
! 171: assert n > 0
! 172: last = t[0]
! 173: lasti = i = 1
! 174: while i < n:
! 175: if t[i] != last:
! 176: t[lasti] = last = t[i]
! 177: lasti += 1
! 178: i += 1
! 179: return t[:lasti]
! 180:
! 181: # Brute force is all that's left.
! 182: u = []
! 183: for x in s:
! 184: if x not in u:
! 185: u.append(x)
! 186: return u
! 187:
! 188:
! 189:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>