0
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1 from django.shortcuts import render, redirect
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
2 from django.views.generic.base import View
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
3 from proxy.models import Server
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
4 import urllib
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
5 from django.http.response import StreamingHttpResponse, HttpResponse,JsonResponse
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
6
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
7 import json
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
8 import mergedict
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
9 from cgitb import text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
10 from lxml import etree
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
11 import token
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
12 from django.template.base import Token
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
13 from django.views.decorators.http import require_http_methods
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
14 from django.template.context import RequestContext
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
15
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
16 from django.views.decorators.csrf import csrf_protect
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
17
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
18
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
19 class TextToolAnalyse(View):
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
20
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
21 def getLabel(self,token,lang):
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
22 morphUrl="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/lt/GetDictionaryEntries?language=%s&outputFormat=xml&outputType=morphCompact"%lang
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
23 morphUrl+="&query=%s"%urllib.request.quote(token)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
24
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
25 search = urllib.request.urlopen(morphUrl)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
26
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
27 #txt = urllib.request.urlopen(searchUrl).read().decode('utf-8')
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
28
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
29
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
30
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
31 print(search)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
32 dom = etree.parse(search).getroot()
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
33
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
34 lem = dom.xpath("//lemma/name")[0].text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
35 return lem
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
36
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
37
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
38
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
39
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
40 def post(self,request):
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
41 return self.get(request,method="post")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
42
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
43 def get(self,request,method="get"):
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
44
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
45 #params_full= dict(request.REQUEST.dicts[1])
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
46
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
47 if method=="get":
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
48 try:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
49 text = request.GET['text']
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
50
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
51 except:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
52 return redirect("./tt/api")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
53 try:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
54 lang = request.GET['lang']
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
55 except:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
56 lang="lat"
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
57
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
58 else:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
59 text = request.POST['text']
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
60
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
61
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
62
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
63 try:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
64 lang = request.POST['lang']
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
65 except:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
66 lang="lat"
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
67
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
68 url="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/text/Tokenize"
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
69 morphUrl="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/lt/GetDictionaryEntries?language=%s&outputFormat=xml&outputType=morphCompact"%lang
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
70 dictServUrl="http://mpdl-service.mpiwg-berlin.mpg.de/mpiwg-mpdl-lt-web/lt/GetDictionaryEntries?language=%s&outputFormat=html&outputType=morphCompact&outputType=dictFull"%lang
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
71
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
72
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
73
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
74
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
75 # query=preface&queryDisplay=Preface&language=en&outputFormat=html&outputType=morphCompact&outputType=dictFull
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
76
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
77
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
78
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
79
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
80
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
81 params={}
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
82
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
83
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
84 #text=text[0:300]#dw for the time beeing
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
85
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
86
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
87 chunks = int(len(text)/300)+1 # zerlege den text in 300 schritte
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
88 ret = {}
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
89 annotations=[]
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
90
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
91 for chunk in range(min(10,chunks)): #maximal 5 z.Z. otherwise to long
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
92
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
93 currentText=text[chunk*300:min(len(text),(chunk+1)*300-1)]
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
94
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
95 params["inputString"]=currentText
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
96 params["language"]=lang
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
97 params["outputFormat"]="xml"
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
98 #params["dictionary"]="Yes"
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
99
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
100
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
101 searchUrl =url+"?"+urllib.parse.urlencode(params,True)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
102
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
103
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
104
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
105 search = urllib.request.urlopen(searchUrl)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
106
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
107 #txt = urllib.request.urlopen(searchUrl).read().decode('utf-8')
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
108
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
109
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
110
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
111 print(searchUrl)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
112
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
113 try:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
114 dom = etree.parse(search)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
115 root = dom.getroot()
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
116
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
117 except: #go around a bug in mpiwg service where ulrs are not properly encoded
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
118 search = urllib.request.urlopen(searchUrl)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
119 txt=search.read().decode("utf-8")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
120 txt=txt.replace("&","&")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
121 root =etree.fromstring(txt)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
122
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
123
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
124 time = str(root.xpath("./elapsed-time-ms")[0].text)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
125
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
126 print (time)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
127
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
128
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
129 ret["time"]=ret.get("time",0)+int(time)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
130
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
131
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
132 for token in root.xpath(".//token"):
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
133 annot={}
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
134
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
135
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
136
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
137
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
138
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
139 annot["spot"]=token.xpath("./name")[0].text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
140
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
141 #annot['title'] = token.xpath(".//dictionary/entries/entry/form")[0].text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
142 #annot['label'] = token.xpath(".//dictionary/entries/entry/form")[0].text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
143 #annot['uri'] = token.xpath(".//dictionary/entries/entry/remoteUrl")[0].text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
144 try:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
145 annot['label'] = self.getLabel(annot['spot'],lang)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
146 annot['title'] = self.getLabel(annot['spot'],lang)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
147
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
148 annot['uri'] = dictServUrl+"&query=%s"%annot['spot']
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
149 annot['start']=3
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
150 annot['end']=5
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
151 annot['confidence']=0.8
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
152 annot['image']={}
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
153 #annot['type']="Web page"
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
154
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
155
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
156 annot['abstract']=''
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
157 if lang=="lat":
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
158
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
159
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
160 for dicts in token.xpath(".//dictionary"):
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
161 dictName=dicts.xpath("./name")[0].text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
162 if dictName == "cooper": #choose liddle sctt
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
163 ctsStr=[]
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
164
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
165 try:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
166 annot['abstract']=dicts.xpath("./entries/entry/content/sense/trans")[0].text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
167 except:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
168
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
169 annot['abstract']=""
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
170
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
171 elif lang=="ita":
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
172
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
173
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
174 for dicts in token.xpath(".//dictionary"):
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
175 dictName=dicts.xpath("./name")[0].text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
176 if dictName == "baretti": #choose liddle sctt
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
177 ctsStr=[]
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
178
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
179 try:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
180 #annot['abstract']=""
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
181 annot['abstract']=dicts.xpath("./entries/entry/content/i")[0].text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
182 except:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
183
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
184 annot['abstract']=""
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
185 elif lang=="grc":
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
186
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
187
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
188 for dicts in token.xpath(".//dictionary"):
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
189 dictName=dicts.xpath("./name")[0].text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
190 if dictName == "lsj": #choose liddle sctt
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
191 ctsStr=[]
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
192
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
193 try:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
194 #annot['abstract']=""
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
195 annot['abstract']=dicts.xpath("./entries/entry/content/tr")[0].text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
196 except:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
197
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
198 annot['abstract']=""
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
199
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
200
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
201
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
202 annot['lod']={"word":"http://purl.org/linguistics/gold/OrthographicWord"}
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
203 annot['type']=["http://purl.org/linguistics/gold/OrthographicWord"]
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
204 annot['types']=["http://purl.org/linguistics/gold/OrthographicWord"]
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
205 annotations.append(annot)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
206
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
207
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
208
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
209 except RuntimeError as err:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
210 print (err)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
211 pass
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
212
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
213
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
214 cn = 0
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
215 cs = text
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
216 wps=[]
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
217 for an in annotations:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
218 t=an['spot']
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
219 ps = cs.find(t)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
220 wps.append((t,ps+cn))
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
221 an['start']=ps+cn
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
222 an['end']=ps+cn+len(t)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
223
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
224 cn=ps+cn+len(t)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
225 cs=cs[ps+len(t):]
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
226
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
227
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
228
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
229 #print (wps)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
230 print ("Lenght wordlist:")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
231 print (len(wps))
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
232
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
233
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
234
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
235
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
236
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
237 ret['annotations']=annotations
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
238 ret['lang']=lang
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
239
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
240 return JsonResponse(ret)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
241
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
242 #for token in dom.getroot().result.tokens:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
243 # print (token)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
244
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
245
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
246
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
247
|