cdli/cdli_helpers.py - view

File: [Repository] / cdli / cdli_helpers.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Fri May 15 13:13:31 2009 UTC (15 years, 1 month ago) by dwinter
Branches: MAIN
CVS tags: HEAD

arbeit an admin funktionen

1: def makelist(mySet): 2: x = list(mySet) 3: x.sort() 4: return x 5: 6: def unicodify(s): 7: """decode str (utf-8 or latin-1 representation) into unicode object""" 8: if not s: 9: return u"" 10: if isinstance(s, str): 11: try: 12: return s.decode('utf-8') 13: except: 14: return s.decode('latin-1') 15: else: 16: return s 17: 18: def utf8ify(s): 19: """encode unicode object or string into byte string in utf-8 representation. 20: assumes string objects to be utf-8""" 21: if not s: 22: return "" 23: if isinstance(s, str): 24: return s 25: else: 26: return s.encode('utf-8') 27: 28: def formatAtfHtml(l): 29: """escape special ATF characters for HTML""" 30: if not l: 31: return "" 32: 33: # replace & 34: l = l.replace('&','&') 35: # replace angular brackets 36: l = l.replace('<','<') 37: l = l.replace('>','>') 38: return l 39: 40: def formatAtfLineHtml(l, nolemma=True): 41: """format ATF line for HTML""" 42: if not l: 43: return "" 44: 45: if nolemma: 46: # ignore lemma lines 47: if l.lstrip().startswith('#lem:'): 48: return "" 49: 50: return formatAtfHtml(l) 51: 52: 53: 54: def formatAtfFullLineNum(txt, nolemma=True): 55: """format full line numbers in ATF text""" 56: # surface codes 57: surfaces = {'@obverse':'obv', 58: '@reverse':'rev', 59: '@surface':'surface', 60: '@edge':'edge', 61: '@left':'left', 62: '@right':'right', 63: '@top':'top', 64: '@bottom':'bottom', 65: '@face':'face', 66: '@seal':'seal'} 67: 68: if not txt: 69: return "" 70: 71: ret = [] 72: surf = "" 73: col = "" 74: for line in txt.splitlines(): 75: line = unicodify(line) 76: if line and line[0] == '@': 77: # surface or column 78: words = line.split(' ') 79: if words[0] in surfaces: 80: surf = line.replace(words[0],surfaces[words[0]]).strip() 81: 82: elif words[0] == '@column': 83: col = ' '.join(words[1:]) 84: 85: elif line and line[0] in '123456789': 86: # ordinary line -> add line number 87: line = "%s:%s:%s"%(surf,col,line) 88: 89: ret.append(line) 90: 91: return '\n'.join(ret) 92: 93: 94: def generateXMLReturn(hash): 95: """erzeugt das xml file als returnwert fuer uploadATFRPC""" 96: 97: ret="<return>" 98: 99: ret+="<errors>" 100: for error in hash['errors']: 101: ret+="""<error atf="%s">%s</error>"""%error 102: 103: ret+="</errors>" 104: 105: ret+="<changes>" 106: for changed in hash['changed']: 107: ret+="""<change atf="%s">%s</change>"""%changed 108: ret+="</changes>" 109: 110: ret+="<newPs>" 111: for new in hash['newPs']: 112: ret+="""<new atf="%s"/>"""%new 113: ret+="</newPs>" 114: 115: ret+="</return>" 116: return ret 117: 118: 119: def unique(s): 120: """Return a list of the elements in s, but without duplicates. 121: 122: For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], 123: unique("abcabc") some permutation of ["a", "b", "c"], and 124: unique(([1, 2], [2, 3], [1, 2])) some permutation of 125: [[2, 3], [1, 2]]. 126: 127: For best speed, all sequence elements should be hashable. Then 128: unique() will usually work in linear time. 129: 130: If not possible, the sequence elements should enjoy a total 131: ordering, and if list(s).sort() doesn't raise TypeError it's 132: assumed that they do enjoy a total ordering. Then unique() will 133: usually work in O(N*log2(N)) time. 134: 135: If that's not possible either, the sequence elements must support 136: equality-testing. Then unique() will usually work in quadratic 137: time. 138: (from the python cookbook) 139: """ 140: 141: n = len(s) 142: if n == 0: 143: return [] 144: 145: # Try using a dict first, as that's the fastest and will usually 146: # work. If it doesn't work, it will usually fail quickly, so it 147: # usually doesn't cost much to *try* it. It requires that all the 148: # sequence elements be hashable, and support equality comparison. 149: u = {} 150: try: 151: for x in s: 152: u[x] = 1 153: except TypeError: 154: del u # move on to the next method 155: else: 156: return u.keys() 157: 158: # We can't hash all the elements. Second fastest is to sort, 159: # which brings the equal elements together; then duplicates are 160: # easy to weed out in a single pass. 161: # NOTE: Python's list.sort() was designed to be efficient in the 162: # presence of many duplicate elements. This isn't true of all 163: # sort functions in all languages or libraries, so this approach 164: # is more effective in Python than it may be elsewhere. 165: try: 166: t = list(s) 167: t.sort() 168: except TypeError: 169: del t # move on to the next method 170: else: 171: assert n > 0 172: last = t[0] 173: lasti = i = 1 174: while i < n: 175: if t[i] != last: 176: t[lasti] = last = t[i] 177: lasti += 1 178: i += 1 179: return t[:lasti] 180: 181: # Brute force is all that's left. 182: u = [] 183: for x in s: 184: if x not in u: 185: u.append(x) 186: return u 187: 188: 189: