File:  [Repository] / cdli / cdli_helpers.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Fri May 15 13:13:31 2009 UTC (14 years, 11 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD
arbeit an admin funktionen

def makelist(mySet):
        x = list(mySet)
        x.sort()
        return x
    
def unicodify(s):
    """decode str (utf-8 or latin-1 representation) into unicode object"""
    if not s:
        return u""
    if isinstance(s, str):
        try:
            return s.decode('utf-8')
        except:
            return s.decode('latin-1')
    else:
        return s

def utf8ify(s):
    """encode unicode object or string into byte string in utf-8 representation.
       assumes string objects to be utf-8"""
    if not s:
        return ""
    if isinstance(s, str):
        return s
    else:
        return s.encode('utf-8')

def formatAtfHtml(l):
    """escape special ATF characters for HTML"""
    if not l:
        return ""

    # replace &
    l = l.replace('&','&')
    # replace angular brackets
    l = l.replace('<','&lt;')
    l = l.replace('>','&gt;')
    return l

def formatAtfLineHtml(l, nolemma=True):
    """format ATF line for HTML"""
    if not l:
        return ""

    if nolemma:
        # ignore lemma lines
        if l.lstrip().startswith('#lem:'):
            return ""
    
    return formatAtfHtml(l)



def formatAtfFullLineNum(txt, nolemma=True):
    """format full line numbers in ATF text"""
    # surface codes
    surfaces = {'@obverse':'obv',
                '@reverse':'rev',
                '@surface':'surface',
                '@edge':'edge',
                '@left':'left',
                '@right':'right',
                '@top':'top',
                '@bottom':'bottom',
                '@face':'face',
                '@seal':'seal'}

    if not txt:
        return ""
    
    ret = []
    surf = ""
    col = ""
    for line in txt.splitlines():
        line = unicodify(line)
        if line and line[0] == '@':
            # surface or column
            words = line.split(' ')
            if words[0] in surfaces:
                surf = line.replace(words[0],surfaces[words[0]]).strip()
            
            elif words[0] == '@column':
                col = ' '.join(words[1:])
            
        elif line and line[0] in '123456789':
            # ordinary line -> add line number
            line = "%s:%s:%s"%(surf,col,line)
            
        ret.append(line)
    
    return '\n'.join(ret)
            
            
def generateXMLReturn(hash):
    """erzeugt das xml file als returnwert fuer uploadATFRPC"""

    ret="<return>"
    
    ret+="<errors>"
    for error in hash['errors']:
        ret+="""<error atf="%s">%s</error>"""%error
    
    ret+="</errors>"
    
    ret+="<changes>"
    for changed in hash['changed']:
        ret+="""<change atf="%s">%s</change>"""%changed
    ret+="</changes>"
    
    ret+="<newPs>"
    for new in hash['newPs']:
        ret+="""<new atf="%s"/>"""%new
    ret+="</newPs>"
    
    ret+="</return>"
    return ret
    
    
def unique(s):
    """Return a list of the elements in s, but without duplicates.

    For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
    unique("abcabc") some permutation of ["a", "b", "c"], and
    unique(([1, 2], [2, 3], [1, 2])) some permutation of
    [[2, 3], [1, 2]].

    For best speed, all sequence elements should be hashable.  Then
    unique() will usually work in linear time.

    If not possible, the sequence elements should enjoy a total
    ordering, and if list(s).sort() doesn't raise TypeError it's
    assumed that they do enjoy a total ordering.  Then unique() will
    usually work in O(N*log2(N)) time.

    If that's not possible either, the sequence elements must support
    equality-testing.  Then unique() will usually work in quadratic
    time.
    (from the python cookbook)
    """

    n = len(s)
    if n == 0:
        return []

    # Try using a dict first, as that's the fastest and will usually
    # work.  If it doesn't work, it will usually fail quickly, so it
    # usually doesn't cost much to *try* it.  It requires that all the
    # sequence elements be hashable, and support equality comparison.
    u = {}
    try:
        for x in s:
            u[x] = 1
    except TypeError:
        del u  # move on to the next method
    else:
        return u.keys()

    # We can't hash all the elements.  Second fastest is to sort,
    # which brings the equal elements together; then duplicates are
    # easy to weed out in a single pass.
    # NOTE:  Python's list.sort() was designed to be efficient in the
    # presence of many duplicate elements.  This isn't true of all
    # sort functions in all languages or libraries, so this approach
    # is more effective in Python than it may be elsewhere.
    try:
        t = list(s)
        t.sort()
    except TypeError:
        del t  # move on to the next method
    else:
        assert n > 0
        last = t[0]
        lasti = i = 1
        while i < n:
            if t[i] != last:
                t[lasti] = last = t[i]
                lasti += 1
            i += 1
        return t[:lasti]

    # Brute force is all that's left.
    u = []
    for x in s:
        if x not in u:
            u.append(x)
    return u


 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>