annotate exportTextToSolr.py @ 4:ceac1f8e896f default tip

exportTexts
author dwinter
date Fri, 14 Dec 2012 13:47:41 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
1 import json
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
2
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
3
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
4 import urllib2
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
5
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
6
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
7 u = urllib2.urlopen('https://openmind-ismi-dev.mpiwg-berlin.mpg.de/om4-ismi/jsonInterface?method=get_ents&oc=TEXT')
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
8
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
9 texts= json.load(u)
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
10
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
11 for ent in texts['ents']:
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
12 id= ent['id']
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
13
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
14 x = urllib2.urlopen('http://localhost:58180/ISMI/database/text.xml?id='+repr(id))
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
15 fw=file("/tmp/"+repr(id)+".xml",'w')
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
16 print id
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
17 fw.write(x.read())
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
18 fw.close()
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
19 x.close()
ceac1f8e896f exportTexts
dwinter
parents:
diff changeset
20 u.close()