"""split atf file, get an xtf file""" import sys from xml.dom.pulldom import parse def splitxtf(filename): doc=parse(filename) while 1: node=doc.getEvent() if node is None: break; else: if node[1].nodeName=='transliteration': doc.expandNode(node[1]) fn=node[1].getAttribute("xml:id") nf=file("xtf/"+fn+".xtf",'w') nf.write(""""""+node[1].toxml()+"") print "wrote: %s"%fn def splitatf(filename): """split it""" try: fh=open(filename,"r") except: print """Can't open file: %s (%s %s)"""%(filename,sys.exc_info()[0],sys.exc_info()[0]) nf=None for line in fh.readlines(): print line if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile if nf: nf.close() #close last file filename=line[1:].split("=")[0].rstrip()+".atf" nf=file(filename,"w") nf.write(line) nf.close() fh.close() splitxtf(sys.argv[1])