annotate ttools/views.py @ 0:af2f8fe486f6 default tip

initial
author Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
date Tue, 17 Feb 2015 12:44:40 +0100
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
1 from django.shortcuts import render, redirect
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
2 from django.views.generic.base import View
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
3 from proxy.models import Server
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
4 import urllib
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
5 from django.http.response import StreamingHttpResponse, HttpResponse,JsonResponse
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
6
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
7 import json
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
8 import mergedict
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
9 from cgitb import text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
10 from lxml import etree
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
11 import token
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
12 from django.template.base import Token
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
13 from django.views.decorators.http import require_http_methods
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
14 from django.template.context import RequestContext
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
15
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
16 from django.views.decorators.csrf import csrf_protect
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
17
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
18
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
19 class TextToolAnalyse(View):
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
20
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
21 def getLabel(self,token,lang):
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
22 morphUrl="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/lt/GetDictionaryEntries?language=%s&outputFormat=xml&outputType=morphCompact"%lang
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
23 morphUrl+="&query=%s"%urllib.request.quote(token)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
24
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
25 search = urllib.request.urlopen(morphUrl)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
26
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
27 #txt = urllib.request.urlopen(searchUrl).read().decode('utf-8')
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
28
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
29
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
30
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
31 print(search)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
32 dom = etree.parse(search).getroot()
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
33
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
34 lem = dom.xpath("//lemma/name")[0].text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
35 return lem
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
36
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
37
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
38
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
39
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
40 def post(self,request):
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
41 return self.get(request,method="post")
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
42
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
43 def get(self,request,method="get"):
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
44
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
45 #params_full= dict(request.REQUEST.dicts[1])
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
46
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
47 if method=="get":
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
48 try:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
49 text = request.GET['text']
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
50
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
51 except:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
52 return redirect("./tt/api")
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
53 try:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
54 lang = request.GET['lang']
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
55 except:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
56 lang="lat"
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
57
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
58 else:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
59 text = request.POST['text']
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
60
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
61
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
62
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
63 try:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
64 lang = request.POST['lang']
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
65 except:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
66 lang="lat"
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
67
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
68 url="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/text/Tokenize"
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
69 morphUrl="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/lt/GetDictionaryEntries?language=%s&outputFormat=xml&outputType=morphCompact"%lang
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
70 dictServUrl="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/lt/GetDictionaryEntries?language=%s&outputFormat=html&outputType=morphCompact&outputType=dictFull"%lang
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
71
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
72
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
73
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
74
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
75 # query=preface&queryDisplay=Preface&language=en&outputFormat=html&outputType=morphCompact&outputType=dictFull
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
76
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
77
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
78
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
79
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
80
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
81 params={}
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
82
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
83
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
84 #text=text[0:300]#dw for the time beeing
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
85
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
86
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
87 chunks = int(len(text)/300)+1 # zerlege den text in 300 schritte
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
88 ret = {}
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
89 annotations=[]
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
90
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
91 for chunk in range(min(10,chunks)): #maximal 5 z.Z. otherwise to long
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
92
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
93 currentText=text[chunk*300:min(len(text),(chunk+1)*300-1)]
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
94
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
95 params["inputString"]=currentText
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
96 params["language"]=lang
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
97 params["outputFormat"]="xml"
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
98 #params["dictionary"]="Yes"
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
99
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
100
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
101 searchUrl =url+"?"+urllib.parse.urlencode(params,True)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
102
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
103
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
104
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
105 search = urllib.request.urlopen(searchUrl)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
106
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
107 #txt = urllib.request.urlopen(searchUrl).read().decode('utf-8')
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
108
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
109
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
110
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
111 print(searchUrl)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
112
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
113 try:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
114 dom = etree.parse(search)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
115 root = dom.getroot()
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
116
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
117 except: #go around a bug in mpiwg service where ulrs are not properly encoded
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
118 search = urllib.request.urlopen(searchUrl)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
119 txt=search.read().decode("utf-8")
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
120 txt=txt.replace("&","&amp;")
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
121 root =etree.fromstring(txt)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
122
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
123
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
124 time = str(root.xpath("./elapsed-time-ms")[0].text)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
125
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
126 print (time)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
127
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
128
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
129 ret["time"]=ret.get("time",0)+int(time)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
130
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
131
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
132 for token in root.xpath(".//token"):
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
133 annot={}
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
134
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
135
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
136
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
137
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
138
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
139 annot["spot"]=token.xpath("./name")[0].text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
140
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
141 #annot['title'] = token.xpath(".//dictionary/entries/entry/form")[0].text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
142 #annot['label'] = token.xpath(".//dictionary/entries/entry/form")[0].text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
143 #annot['uri'] = token.xpath(".//dictionary/entries/entry/remoteUrl")[0].text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
144 try:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
145 annot['label'] = self.getLabel(annot['spot'],lang)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
146 annot['title'] = self.getLabel(annot['spot'],lang)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
147
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
148 annot['uri'] = dictServUrl+"&query=%s"%annot['spot']
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
149 annot['start']=3
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
150 annot['end']=5
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
151 annot['confidence']=0.8
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
152 annot['image']={}
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
153 #annot['type']="Web page"
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
154
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
155
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
156 annot['abstract']=''
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
157 if lang=="lat":
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
158
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
159
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
160 for dicts in token.xpath(".//dictionary"):
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
161 dictName=dicts.xpath("./name")[0].text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
162 if dictName == "cooper": #choose liddle sctt
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
163 ctsStr=[]
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
164
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
165 try:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
166 annot['abstract']=dicts.xpath("./entries/entry/content/sense/trans")[0].text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
167 except:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
168
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
169 annot['abstract']=""
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
170
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
171 elif lang=="ita":
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
172
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
173
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
174 for dicts in token.xpath(".//dictionary"):
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
175 dictName=dicts.xpath("./name")[0].text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
176 if dictName == "baretti": #choose liddle sctt
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
177 ctsStr=[]
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
178
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
179 try:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
180 #annot['abstract']=""
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
181 annot['abstract']=dicts.xpath("./entries/entry/content/i")[0].text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
182 except:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
183
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
184 annot['abstract']=""
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
185 elif lang=="grc":
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
186
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
187
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
188 for dicts in token.xpath(".//dictionary"):
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
189 dictName=dicts.xpath("./name")[0].text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
190 if dictName == "lsj": #choose liddle sctt
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
191 ctsStr=[]
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
192
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
193 try:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
194 #annot['abstract']=""
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
195 annot['abstract']=dicts.xpath("./entries/entry/content/tr")[0].text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
196 except:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
197
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
198 annot['abstract']=""
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
199
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
200
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
201
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
202 annot['lod']={"word":"http://purl.org/linguistics/gold/OrthographicWord"}
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
203 annot['type']=["http://purl.org/linguistics/gold/OrthographicWord"]
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
204 annot['types']=["http://purl.org/linguistics/gold/OrthographicWord"]
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
205 annotations.append(annot)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
206
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
207
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
208
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
209 except RuntimeError as err:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
210 print (err)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
211 pass
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
212
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
213
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
214 cn = 0
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
215 cs = text
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
216 wps=[]
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
217 for an in annotations:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
218 t=an['spot']
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
219 ps = cs.find(t)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
220 wps.append((t,ps+cn))
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
221 an['start']=ps+cn
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
222 an['end']=ps+cn+len(t)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
223
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
224 cn=ps+cn+len(t)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
225 cs=cs[ps+len(t):]
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
226
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
227
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
228
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
229 #print (wps)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
230 print ("Lenght wordlist:")
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
231 print (len(wps))
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
232
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
233
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
234
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
235
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
236
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
237 ret['annotations']=annotations
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
238 ret['lang']=lang
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
239
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
240 return JsonResponse(ret)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
241
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
242 #for token in dom.getroot().result.tokens:
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
243 # print (token)
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
244
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
245
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
246
af2f8fe486f6 initial
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff changeset
247