diff test/test2.py @ 0:834706423ac1

initial
author dwinter
date Tue, 26 Feb 2013 15:22:07 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test2.py	Tue Feb 26 15:22:07 2013 +0100
@@ -0,0 +1,23 @@
+import httplib2
+import xml.etree.ElementTree as ET
+
+h = httplib2.Http()
+resp, content = h.request("http://localhost:8983/solr/mpiwgweb/select?q=*%3A*&wt=xml&tv=on&qt=tvrh&fl=main_content&tv.tf=true")
+
+root = ET.fromstring(content)
+uri = "http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28"
+fieldname="main_content"
+tvs = root.findall(".//lst[@name='termVectors']/lst[@name='%s']/lst[@name='%s']/lst"%(uri,fieldname))
+print tvs
+for tv in tvs:
+    word = tv.attrib['name']
+    for f in  tv.findall("./int[@name='tf']"):
+        fre = f.text
+        
+    print word,fre
+
+
+
+
+
+#<lst name="termVectors"><str name="uniqueKeyFieldName">urlNorm</str><lst name="http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28"><str name="uniqueKey">http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28</str><lst name="main_content"><lst name="1960er"><int name="tf">1</int></lst><lst name="1970er"><int name="tf">2</int></lst><lst name="1986">
\ No newline at end of file