1: """split atf file, get an xtf file"""
2:
3: import sys
4: from xml.dom.pulldom import parse
5:
6: def splitxtf(filename):
7: doc=parse(filename)
8: while 1:
9: node=doc.getEvent()
10:
11: if node is None:
12: break;
13: else:
14: if node[1].nodeName=='transliteration':
15: doc.expandNode(node[1])
16: fn=node[1].getAttribute("xml:id")
17: nf=file("xtf/"+fn+".xtf",'w')
18: nf.write("""<texts xmlns="http://emegir.info/xtf" xmlns:lem="http://emegir.info/lemma" >"""+node[1].toxml()+"</texts>")
19: print "wrote: %s"%fn
20:
21: def splitatf(filename):
22: """split it"""
23: try:
24: fh=open(filename,"r")
25: except:
26: print """Can't open file: %s (%s %s)"""%(filename,sys.exc_info()[0],sys.exc_info()[0])
27:
28: nf=None
29: for line in fh.readlines():
30: print line
31: if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile
32: if nf:
33: nf.close() #close last file
34:
35:
36: filename=line[1:].split("=")[0].rstrip()+".atf"
37: nf=file(filename,"w")
38:
39: nf.write(line)
40:
41: nf.close()
42: fh.close()
43:
44:
45: splitxtf(sys.argv[1])
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>