view test/test2.py @ 12:2e2da6d5e30d default tip

bug in generateLink if searchterm ="*" dann wird _fc nicht in den Link eingebaut.
author dwinter
date Thu, 22 Aug 2013 14:21:25 +0200
parents 834706423ac1
children
line wrap: on
line source

import httplib2
import xml.etree.ElementTree as ET

h = httplib2.Http()
resp, content = h.request("http://localhost:8983/solr/mpiwgweb/select?q=*%3A*&wt=xml&tv=on&qt=tvrh&fl=main_content&tv.tf=true")

root = ET.fromstring(content)
uri = "http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28"
fieldname="main_content"
tvs = root.findall(".//lst[@name='termVectors']/lst[@name='%s']/lst[@name='%s']/lst"%(uri,fieldname))
print tvs
for tv in tvs:
    word = tv.attrib['name']
    for f in  tv.findall("./int[@name='tf']"):
        fre = f.text
        
    print word,fre





#<lst name="termVectors"><str name="uniqueKeyFieldName">urlNorm</str><lst name="http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28"><str name="uniqueKey">http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28</str><lst name="main_content"><lst name="1960er"><int name="tf">1</int></lst><lst name="1970er"><int name="tf">2</int></lst><lst name="1986">