""" some helpfull methods """ import xml.dom.minidom import os.path import re import urllib def getISO(): # very bad made has to be configured """ISO""" try: f=file('/usr/local/mpiwg/Zope/lib/python/Products/OSA_system/iso639-1.inc','r').readlines() #f=file('OSA_system/iso639-1.inc','r').readlines() ret={} for lineraw in f: line=lineraw.encode('ascii','replace') value=string.split(line,'\t')[0].encode('ascii','replace') key=string.split(line,'\t')[1].encode('ascii','replace') ret[key]=value except: ret={} return ret def readArchimedesXML(folder): """gib URL aus """ try: XML=urllib.urlopen("http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpusmanifest").read() #print XML dom=xml.dom.minidom.parseString(XML) items=dom.getElementsByTagName('item') dict={} for item in items: #print item.attributes['dir'].value try: dict[item.attributes['dir'].value]=item.attributes['xml'].value #print item.attributes['dir'].value,item.attributes['text'].value except: """nothing""" if dict.has_key(folder): return dict[folder] else: return "" except: return "" def getText(nodelist): rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc def changeNodesInIndexMeta(path,node,subnodes,parent=None,nodeAttributes=None): """node mit subnodes wird in index.meta in path eingetragen bzw. ausgetauscht""" if os.path.exists(path+"/index.meta"): inFile=file(path+"/index.meta") f=inFile.read() inFile.close() else: f="""""" dom=xml.dom.minidom.parseString(f) root=dom.getElementsByTagName("resource")[0] if parent: try: root=root.getElementsByTagName(parent)[0] except: #print "parent",parent nodeNew=dom.createElement(parent) root.appendChild(nodeNew) root=root.getElementsByTagName(parent)[0] # check if node already exist if node=="": for subnode in subnodes.keys(): nodeOld=root.getElementsByTagName(subnode) if nodeOld: # if yes -> delete it try: root.removeChild(nodeOld[0]).unlink() except: """nothing""" namenode=dom.createElement(re.sub(r' ','-',subnode)) namenodetext=dom.createTextNode(subnodes[subnode].decode("utf8")) namenode.appendChild(namenodetext) root.appendChild(namenode) else: nodeOld=root.getElementsByTagName(node) if nodeOld: # if yes -> delete it root.removeChild(nodeOld[0]).unlink() nodeNew=dom.createElement(node) # create new if nodeAttributes: for attribute in nodeAttributes.keys(): #print attribute,nodeAttributes[attribute] nodeNew.setAttribute(attribute,nodeAttributes[attribute]) for subnode in subnodes.keys(): namenode=dom.createElement(re.sub(r' ','-',subnode)) namenodetext=dom.createTextNode(subnodes[subnode].decode("utf8")) namenode.appendChild(namenodetext) nodeNew.appendChild(namenode) root.appendChild(nodeNew) writefile=file(path+"/index.meta","w") writefile.write(dom.toxml().encode('utf-8')) writefile.close() os.chmod(path+"/index.meta",0664)