annotate test/test2.py @ 11:f7fe88804cb8

Merge with 896bea4f61a00f3771c92a66fe55af7826974fa7
author dwinter
date Thu, 27 Jun 2013 08:29:20 +0200
parents 834706423ac1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
834706423ac1 initial
dwinter
parents:
diff changeset
1 import httplib2
834706423ac1 initial
dwinter
parents:
diff changeset
2 import xml.etree.ElementTree as ET
834706423ac1 initial
dwinter
parents:
diff changeset
3
834706423ac1 initial
dwinter
parents:
diff changeset
4 h = httplib2.Http()
834706423ac1 initial
dwinter
parents:
diff changeset
5 resp, content = h.request("http://localhost:8983/solr/mpiwgweb/select?q=*%3A*&wt=xml&tv=on&qt=tvrh&fl=main_content&tv.tf=true")
834706423ac1 initial
dwinter
parents:
diff changeset
6
834706423ac1 initial
dwinter
parents:
diff changeset
7 root = ET.fromstring(content)
834706423ac1 initial
dwinter
parents:
diff changeset
8 uri = "http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28"
834706423ac1 initial
dwinter
parents:
diff changeset
9 fieldname="main_content"
834706423ac1 initial
dwinter
parents:
diff changeset
10 tvs = root.findall(".//lst[@name='termVectors']/lst[@name='%s']/lst[@name='%s']/lst"%(uri,fieldname))
834706423ac1 initial
dwinter
parents:
diff changeset
11 print tvs
834706423ac1 initial
dwinter
parents:
diff changeset
12 for tv in tvs:
834706423ac1 initial
dwinter
parents:
diff changeset
13 word = tv.attrib['name']
834706423ac1 initial
dwinter
parents:
diff changeset
14 for f in tv.findall("./int[@name='tf']"):
834706423ac1 initial
dwinter
parents:
diff changeset
15 fre = f.text
834706423ac1 initial
dwinter
parents:
diff changeset
16
834706423ac1 initial
dwinter
parents:
diff changeset
17 print word,fre
834706423ac1 initial
dwinter
parents:
diff changeset
18
834706423ac1 initial
dwinter
parents:
diff changeset
19
834706423ac1 initial
dwinter
parents:
diff changeset
20
834706423ac1 initial
dwinter
parents:
diff changeset
21
834706423ac1 initial
dwinter
parents:
diff changeset
22
834706423ac1 initial
dwinter
parents:
diff changeset
23 #<lst name="termVectors"><str name="uniqueKeyFieldName">urlNorm</str><lst name="http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28"><str name="uniqueKey">http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28</str><lst name="main_content"><lst name="1960er"><int name="tf">1</int></lst><lst name="1970er"><int name="tf">2</int></lst><lst name="1986">