0
|
1 import httplib2
|
|
2 import xml.etree.ElementTree as ET
|
|
3
|
|
4 h = httplib2.Http()
|
|
5 resp, content = h.request("http://localhost:8983/solr/mpiwgweb/select?q=*%3A*&wt=xml&tv=on&qt=tvrh&fl=main_content&tv.tf=true")
|
|
6
|
|
7 root = ET.fromstring(content)
|
|
8 uri = "http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28"
|
|
9 fieldname="main_content"
|
|
10 tvs = root.findall(".//lst[@name='termVectors']/lst[@name='%s']/lst[@name='%s']/lst"%(uri,fieldname))
|
|
11 print tvs
|
|
12 for tv in tvs:
|
|
13 word = tv.attrib['name']
|
|
14 for f in tv.findall("./int[@name='tf']"):
|
|
15 fre = f.text
|
|
16
|
|
17 print word,fre
|
|
18
|
|
19
|
|
20
|
|
21
|
|
22
|
|
23 #<lst name="termVectors"><str name="uniqueKeyFieldName">urlNorm</str><lst name="http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28"><str name="uniqueKey">http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28</str><lst name="main_content"><lst name="1960er"><int name="tf">1</int></lst><lst name="1970er"><int name="tf">2</int></lst><lst name="1986"> |