djangoSolrSearchProxy: ttools/views.py comparison

comparison ttools/views.py @ 0:af2f8fe486f6 default tip

initial

author	Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
date	Tue, 17 Feb 2015 12:44:40 +0100
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:af2f8fe486f6
+from django.shortcuts import render, redirect
+from django.views.generic.base import View
+from proxy.models import Server
+import urllib
+from django.http.response import StreamingHttpResponse, HttpResponse,JsonResponse
+import json
+import mergedict
+from cgitb import text
+from lxml import  etree
+import token
+from django.template.base import Token
+from django.views.decorators.http import require_http_methods
+from django.template.context import RequestContext
+from django.views.decorators.csrf import csrf_protect
+class TextToolAnalyse(View):
+def getLabel(self,token,lang):
+morphUrl="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/lt/GetDictionaryEntries?language=%s&outputFormat=xml&outputType=morphCompact"%lang
+morphUrl+="&query=%s"%urllib.request.quote(token)
+search = urllib.request.urlopen(morphUrl)
+#txt = urllib.request.urlopen(searchUrl).read().decode('utf-8')
+print(search)
+dom = etree.parse(search).getroot()
+lem = dom.xpath("//lemma/name")[0].text
+return lem
+def post(self,request):
+return self.get(request,method="post")
+def get(self,request,method="get"):
+#params_full= dict(request.REQUEST.dicts[1])
+if method=="get":
+try:
+text = request.GET['text']
+except:
+return redirect("./tt/api")
+try:
+lang = request.GET['lang']
+except:
+lang="lat"
+else:
+text = request.POST['text']
+try:
+lang = request.POST['lang']
+except:
+lang="lat"
+url="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/text/Tokenize"
+morphUrl="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/lt/GetDictionaryEntries?language=%s&outputFormat=xml&outputType=morphCompact"%lang
+dictServUrl="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/lt/GetDictionaryEntries?language=%s&outputFormat=html&outputType=morphCompact&outputType=dictFull"%lang
+# query=preface&queryDisplay=Preface&language=en&outputFormat=html&outputType=morphCompact&outputType=dictFull
+params={}
+#text=text[0:300]#dw for the time beeing
+chunks = int(len(text)/300)+1 # zerlege den text in 300 schritte
+ret = {}
+annotations=[]
+for chunk in range(min(10,chunks)): #maximal 5 z.Z. otherwise to long
+currentText=text[chunk*300:min(len(text),(chunk+1)*300-1)]
+params["inputString"]=currentText
+params["language"]=lang
+params["outputFormat"]="xml"
+#params["dictionary"]="Yes"
+searchUrl =url+"?"+urllib.parse.urlencode(params,True)
+search = urllib.request.urlopen(searchUrl)
+#txt = urllib.request.urlopen(searchUrl).read().decode('utf-8')
+print(searchUrl)
+try:
+dom = etree.parse(search)
+root = dom.getroot()
+except: #go around a bug in mpiwg service where ulrs are not properly encoded
+search = urllib.request.urlopen(searchUrl)
+txt=search.read().decode("utf-8")
+txt=txt.replace("&","&amp;")
+root =etree.fromstring(txt)
+time = str(root.xpath("./elapsed-time-ms")[0].text)
+print (time)
+ret["time"]=ret.get("time",0)+int(time)
+for token in root.xpath(".//token"):
+annot={}
+annot["spot"]=token.xpath("./name")[0].text
+#annot['title'] = token.xpath(".//dictionary/entries/entry/form")[0].text
+#annot['label'] = token.xpath(".//dictionary/entries/entry/form")[0].text
+#annot['uri'] = token.xpath(".//dictionary/entries/entry/remoteUrl")[0].text
+try:
+annot['label'] = self.getLabel(annot['spot'],lang)
+annot['title'] = self.getLabel(annot['spot'],lang)
+annot['uri'] = dictServUrl+"&query=%s"%annot['spot']
+annot['start']=3
+annot['end']=5
+annot['confidence']=0.8
+annot['image']={}
+#annot['type']="Web page"
+annot['abstract']=''
+if lang=="lat":
+for dicts in token.xpath(".//dictionary"):
+dictName=dicts.xpath("./name")[0].text
+if dictName == "cooper": #choose liddle sctt
+ctsStr=[]
+try:
+annot['abstract']=dicts.xpath("./entries/entry/content/sense/trans")[0].text
+except:
+annot['abstract']=""
+elif lang=="ita":
+for dicts in token.xpath(".//dictionary"):
+dictName=dicts.xpath("./name")[0].text
+if dictName == "baretti": #choose liddle sctt
+ctsStr=[]
+try:
+#annot['abstract']=""
+annot['abstract']=dicts.xpath("./entries/entry/content/i")[0].text
+except:
+annot['abstract']=""
+elif lang=="grc":
+for dicts in token.xpath(".//dictionary"):
+dictName=dicts.xpath("./name")[0].text
+if dictName == "lsj": #choose liddle sctt
+ctsStr=[]
+try:
+#annot['abstract']=""
+annot['abstract']=dicts.xpath("./entries/entry/content/tr")[0].text
+except:
+annot['abstract']=""
+annot['lod']={"word":"http://purl.org/linguistics/gold/OrthographicWord"}
+annot['type']=["http://purl.org/linguistics/gold/OrthographicWord"]
+annot['types']=["http://purl.org/linguistics/gold/OrthographicWord"]
+annotations.append(annot)
+except RuntimeError as err:
+print (err)
+pass
+cn = 0
+cs = text
+wps=[]
+for an in annotations:
+t=an['spot']
+ps = cs.find(t)
+wps.append((t,ps+cn))
+an['start']=ps+cn
+an['end']=ps+cn+len(t)
+cn=ps+cn+len(t)
+cs=cs[ps+len(t):]
+#print (wps)
+print ("Lenght wordlist:")
+print (len(wps))
+ret['annotations']=annotations
+ret['lang']=lang
+return JsonResponse(ret)
+#for token in dom.getroot().result.tokens:
+#    print (token)

Mercurial > hg > djangoSolrSearchProxy

comparison ttools/views.py @ 0:af2f8fe486f6 default tip