cdli/cdli_helpers.py - view

File: [Repository] / cdli / cdli_helpers.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Fri May 15 13:13:31 2009 UTC (14 years, 11 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD

arbeit an admin funktionen

def makelist(mySet): x = list(mySet) x.sort() return x def unicodify(s): """decode str (utf-8 or latin-1 representation) into unicode object""" if not s: return u"" if isinstance(s, str): try: return s.decode('utf-8') except: return s.decode('latin-1') else: return s def utf8ify(s): """encode unicode object or string into byte string in utf-8 representation. assumes string objects to be utf-8""" if not s: return "" if isinstance(s, str): return s else: return s.encode('utf-8') def formatAtfHtml(l): """escape special ATF characters for HTML""" if not l: return "" # replace & l = l.replace('&','&') # replace angular brackets l = l.replace('<','<') l = l.replace('>','>') return l def formatAtfLineHtml(l, nolemma=True): """format ATF line for HTML""" if not l: return "" if nolemma: # ignore lemma lines if l.lstrip().startswith('#lem:'): return "" return formatAtfHtml(l) def formatAtfFullLineNum(txt, nolemma=True): """format full line numbers in ATF text""" # surface codes surfaces = {'@obverse':'obv', '@reverse':'rev', '@surface':'surface', '@edge':'edge', '@left':'left', '@right':'right', '@top':'top', '@bottom':'bottom', '@face':'face', '@seal':'seal'} if not txt: return "" ret = [] surf = "" col = "" for line in txt.splitlines(): line = unicodify(line) if line and line[0] == '@': # surface or column words = line.split(' ') if words[0] in surfaces: surf = line.replace(words[0],surfaces[words[0]]).strip() elif words[0] == '@column': col = ' '.join(words[1:]) elif line and line[0] in '123456789': # ordinary line -> add line number line = "%s:%s:%s"%(surf,col,line) ret.append(line) return '\n'.join(ret) def generateXMLReturn(hash): """erzeugt das xml file als returnwert fuer uploadATFRPC""" ret="<return>" ret+="<errors>" for error in hash['errors']: ret+="""<error atf="%s">%s</error>"""%error ret+="</errors>" ret+="<changes>" for changed in hash['changed']: ret+="""<change atf="%s">%s</change>"""%changed ret+="</changes>" ret+="<newPs>" for new in hash['newPs']: ret+="""<new atf="%s"/>"""%new ret+="</newPs>" ret+="</return>" return ret def unique(s): """Return a list of the elements in s, but without duplicates. For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], unique("abcabc") some permutation of ["a", "b", "c"], and unique(([1, 2], [2, 3], [1, 2])) some permutation of [[2, 3], [1, 2]]. For best speed, all sequence elements should be hashable. Then unique() will usually work in linear time. If not possible, the sequence elements should enjoy a total ordering, and if list(s).sort() doesn't raise TypeError it's assumed that they do enjoy a total ordering. Then unique() will usually work in O(N*log2(N)) time. If that's not possible either, the sequence elements must support equality-testing. Then unique() will usually work in quadratic time. (from the python cookbook) """ n = len(s) if n == 0: return [] # Try using a dict first, as that's the fastest and will usually # work. If it doesn't work, it will usually fail quickly, so it # usually doesn't cost much to *try* it. It requires that all the # sequence elements be hashable, and support equality comparison. u = {} try: for x in s: u[x] = 1 except TypeError: del u # move on to the next method else: return u.keys() # We can't hash all the elements. Second fastest is to sort, # which brings the equal elements together; then duplicates are # easy to weed out in a single pass. # NOTE: Python's list.sort() was designed to be efficient in the # presence of many duplicate elements. This isn't true of all # sort functions in all languages or libraries, so this approach # is more effective in Python than it may be elsewhere. try: t = list(s) t.sort() except TypeError: del t # move on to the next method else: assert n > 0 last = t[0] lasti = i = 1 while i < n: if t[i] != last: t[lasti] = last = t[i] lasti += 1 i += 1 return t[:lasti] # Brute force is all that's left. u = [] for x in s: if x not in u: u.append(x) return u