annotate harvestDataFromEvents.py @ 6:a7c8d212f5f4 JS-viewer tip

exchanged e4D for new JS-viewer (GeoTemCo) + whitespace-changes (sorry!)
author Sebastian Kruse <skruse@mpiwg-berlin.mpg.de>
date Tue, 22 Jan 2013 15:12:30 +0100
parents f198c86dd1f6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dwinter
parents:
diff changeset
1 '''
dwinter
parents:
diff changeset
2 Created on 11.09.2012
dwinter
parents:
diff changeset
3
dwinter
parents:
diff changeset
4 @author: dwinter
dwinter
parents:
diff changeset
5 '''
dwinter
parents:
diff changeset
6
dwinter
parents:
diff changeset
7
dwinter
parents:
diff changeset
8 from lxml import etree
dwinter
parents:
diff changeset
9 import os
dwinter
parents:
diff changeset
10 import codecs
dwinter
parents:
diff changeset
11
dwinter
parents:
diff changeset
12 class Event:
dwinter
parents:
diff changeset
13 long=""
dwinter
parents:
diff changeset
14 lat=""
dwinter
parents:
diff changeset
15 oid=""
dwinter
parents:
diff changeset
16 placename=""
dwinter
parents:
diff changeset
17 date=""
dwinter
parents:
diff changeset
18 species=""
dwinter
parents:
diff changeset
19 description=""
dwinter
parents:
diff changeset
20
dwinter
parents:
diff changeset
21 def __str__(self):
dwinter
parents:
diff changeset
22 ret=""
dwinter
parents:
diff changeset
23 ret="%s %s %s"%(self.oid,self.lat,self.long)
dwinter
parents:
diff changeset
24
dwinter
parents:
diff changeset
25 return ret
dwinter
parents:
diff changeset
26
dwinter
parents:
diff changeset
27
dwinter
parents:
diff changeset
28 def toKML(self):
dwinter
parents:
diff changeset
29 ret="""
dwinter
parents:
diff changeset
30 <Placemark>
3
f198c86dd1f6 umstellung fuer dariah browser
dwinter
parents: 1
diff changeset
31 <name>%s</name>
f198c86dd1f6 umstellung fuer dariah browser
dwinter
parents: 1
diff changeset
32 <address>%s</address>
0
dwinter
parents:
diff changeset
33 <description><![CDATA[%s]]></description>
dwinter
parents:
diff changeset
34 <TimeStamp><when>%s</when></TimeStamp>
dwinter
parents:
diff changeset
35 <Point>
dwinter
parents:
diff changeset
36 <coordinates>%s,%s</coordinates>
dwinter
parents:
diff changeset
37 </Point>
dwinter
parents:
diff changeset
38 </Placemark>
dwinter
parents:
diff changeset
39 """%(self.oid,self.placename,self.description,self.date,self.long,self.lat)
dwinter
parents:
diff changeset
40
dwinter
parents:
diff changeset
41
dwinter
parents:
diff changeset
42
dwinter
parents:
diff changeset
43
dwinter
parents:
diff changeset
44 return ret
dwinter
parents:
diff changeset
45
dwinter
parents:
diff changeset
46 class EventKMLTransformer:
dwinter
parents:
diff changeset
47
dwinter
parents:
diff changeset
48 def __init__(self,sqlFilename=None):
dwinter
parents:
diff changeset
49 self.cnt=0
dwinter
parents:
diff changeset
50 #falls ein Filename anfgegeben wird dann wird ein sql-file erzeugt
dwinter
parents:
diff changeset
51 if sqlFilename is not None:
dwinter
parents:
diff changeset
52 self.f=codecs.open(sqlFilename,"w","utf-8")
dwinter
parents:
diff changeset
53 else:
dwinter
parents:
diff changeset
54 self.f=None
dwinter
parents:
diff changeset
55 #self.f = codecs.open("/tmp/out.sql","w","utf-8")
dwinter
parents:
diff changeset
56
dwinter
parents:
diff changeset
57 def createDescription(self,element):
dwinter
parents:
diff changeset
58 xslt_root=etree.parse("../../XSLT/entryToXHTML.xsl")
dwinter
parents:
diff changeset
59 transform = etree.XSLT(xslt_root)
dwinter
parents:
diff changeset
60 res=transform(element)
dwinter
parents:
diff changeset
61 return unicode(res)
dwinter
parents:
diff changeset
62
dwinter
parents:
diff changeset
63
dwinter
parents:
diff changeset
64 def calcDate(self,date):
dwinter
parents:
diff changeset
65
dwinter
parents:
diff changeset
66 splitted=date.split("-")
dwinter
parents:
diff changeset
67
dwinter
parents:
diff changeset
68 if len(splitted)==3:
dwinter
parents:
diff changeset
69 year="20"+splitted[2]
dwinter
parents:
diff changeset
70 month=splitted[1]
dwinter
parents:
diff changeset
71 date=splitted[0]
dwinter
parents:
diff changeset
72
dwinter
parents:
diff changeset
73 return "%s-%s-%s"%(year,month,date)
dwinter
parents:
diff changeset
74
dwinter
parents:
diff changeset
75 return "2000"
dwinter
parents:
diff changeset
76
dwinter
parents:
diff changeset
77
dwinter
parents:
diff changeset
78
dwinter
parents:
diff changeset
79 def writeToSQL(self,event,ident,cnt,f):
dwinter
parents:
diff changeset
80 insert="""
dwinter
parents:
diff changeset
81 INSERT INTO locations (data, "idTxt",id) VALUES ('%s', '%s',%s);"""
dwinter
parents:
diff changeset
82
dwinter
parents:
diff changeset
83 evString = etree.tostring(event).replace("'","")
dwinter
parents:
diff changeset
84 f.write(insert%(evString,ident,cnt))
dwinter
parents:
diff changeset
85
dwinter
parents:
diff changeset
86
dwinter
parents:
diff changeset
87
dwinter
parents:
diff changeset
88 def readString(self,xmlString,locationXPath,cnt):
dwinter
parents:
diff changeset
89
dwinter
parents:
diff changeset
90 tree = etree.XML(xmlString)
dwinter
parents:
diff changeset
91 return self.analyseEventXML(tree,locationXPath,cnt)
dwinter
parents:
diff changeset
92
dwinter
parents:
diff changeset
93
dwinter
parents:
diff changeset
94 def readFile(self,filePath,locationXPath,cnt):
dwinter
parents:
diff changeset
95 tree = etree.parse(filePath)
dwinter
parents:
diff changeset
96 return self.analyseEventXML(tree,locationXPath,cnt)
dwinter
parents:
diff changeset
97
dwinter
parents:
diff changeset
98 def analyseEventXML(self,tree,locationXPath,cnt):
dwinter
parents:
diff changeset
99 counter=0
dwinter
parents:
diff changeset
100
dwinter
parents:
diff changeset
101 ret=[]
dwinter
parents:
diff changeset
102 for event in tree.xpath("//event"):
dwinter
parents:
diff changeset
103
dwinter
parents:
diff changeset
104 text=event.get("text")
dwinter
parents:
diff changeset
105
dwinter
parents:
diff changeset
106
dwinter
parents:
diff changeset
107 #erzeuge zunaechste eine id aus dem textname und dem counter
dwinter
parents:
diff changeset
108
dwinter
parents:
diff changeset
109
dwinter
parents:
diff changeset
110
dwinter
parents:
diff changeset
111 #place_information=event.xpath(".//place_information")
dwinter
parents:
diff changeset
112 place_information=event.xpath(locationXPath)
dwinter
parents:
diff changeset
113
dwinter
parents:
diff changeset
114 for place in place_information:
dwinter
parents:
diff changeset
115 ev=Event()
dwinter
parents:
diff changeset
116 ev.date=self.calcDate(event.get("date_filed"))
dwinter
parents:
diff changeset
117 ev.description=self.createDescription(event)
dwinter
parents:
diff changeset
118 ev.oid="%s.%s"%(text,counter)
dwinter
parents:
diff changeset
119 counter+=1
dwinter
parents:
diff changeset
120 print ev.oid
dwinter
parents:
diff changeset
121 ev.placename=place.text
dwinter
parents:
diff changeset
122
dwinter
parents:
diff changeset
123 ev.lat=place.get("latitude")
dwinter
parents:
diff changeset
124 ev.long=place.get("longitude")
dwinter
parents:
diff changeset
125
dwinter
parents:
diff changeset
126 ret.append(ev)
dwinter
parents:
diff changeset
127
dwinter
parents:
diff changeset
128 if self.f is not None:
dwinter
parents:
diff changeset
129 self.writeToSQL(event,ev.oid,cnt,self.f)
dwinter
parents:
diff changeset
130 cnt+=1
dwinter
parents:
diff changeset
131 return ret,cnt
dwinter
parents:
diff changeset
132
1
283badd62593 new version for embedded e4D
dwinter
parents: 0
diff changeset
133 def readFiles(self,path,locationXPath,cnt=0):
0
dwinter
parents:
diff changeset
134 ret=[]
1
283badd62593 new version for embedded e4D
dwinter
parents: 0
diff changeset
135
0
dwinter
parents:
diff changeset
136 for f in os.listdir(path):
dwinter
parents:
diff changeset
137 ret2,cnt=self.readFile(path+f,locationXPath,cnt)
dwinter
parents:
diff changeset
138 ret+=ret2
dwinter
parents:
diff changeset
139
1
283badd62593 new version for embedded e4D
dwinter
parents: 0
diff changeset
140 return ret,cnt
0
dwinter
parents:
diff changeset
141
dwinter
parents:
diff changeset
142
dwinter
parents:
diff changeset
143 def toKML(self,events):
dwinter
parents:
diff changeset
144 ret="""<kml>
dwinter
parents:
diff changeset
145 """
dwinter
parents:
diff changeset
146 ret+="<Document>"
dwinter
parents:
diff changeset
147
dwinter
parents:
diff changeset
148 for event in events:
dwinter
parents:
diff changeset
149 ret+=event.toKML()
dwinter
parents:
diff changeset
150
dwinter
parents:
diff changeset
151 ret+="</Document>"
dwinter
parents:
diff changeset
152 ret+="</kml>"
dwinter
parents:
diff changeset
153
dwinter
parents:
diff changeset
154 return ret
dwinter
parents:
diff changeset
155
dwinter
parents:
diff changeset
156 def close(self):
dwinter
parents:
diff changeset
157 if self.f is not None:
dwinter
parents:
diff changeset
158 self.f.close()
dwinter
parents:
diff changeset
159
dwinter
parents:
diff changeset
160
dwinter
parents:
diff changeset
161 if __name__ == '__main__':
dwinter
parents:
diff changeset
162
dwinter
parents:
diff changeset
163 tf=EventKMLTransformer("/tmp/out.sql")
dwinter
parents:
diff changeset
164
1
283badd62593 new version for embedded e4D
dwinter
parents: 0
diff changeset
165 x,cnt = tf.readFiles("/Users/dwinter/Documents/Projekte/mmpa-permit-etienne/events/",".//research_location/place_information")
0
dwinter
parents:
diff changeset
166 #x = readFiles("/Users/dwinter/Documents/Projekte/mmpa-permit-etienne/Results/events/")
dwinter
parents:
diff changeset
167 evs= tf.toKML(x)
dwinter
parents:
diff changeset
168
dwinter
parents:
diff changeset
169 out = codecs.open("/tmp/outResearch.xml","w","utf-8")
dwinter
parents:
diff changeset
170
dwinter
parents:
diff changeset
171 out.write(evs)
dwinter
parents:
diff changeset
172 out.close()
dwinter
parents:
diff changeset
173
1
283badd62593 new version for embedded e4D
dwinter
parents: 0
diff changeset
174 x,cnt = tf.readFiles("/Users/dwinter/Documents/Projekte/mmpa-permit-etienne/events/",".//applicant_locations/place_information",cnt)
0
dwinter
parents:
diff changeset
175 #x = readFiles("/Users/dwinter/Documents/Projekte/mmpa-permit-etienne/Results/events/")
dwinter
parents:
diff changeset
176 evs= tf.toKML(x)
dwinter
parents:
diff changeset
177
dwinter
parents:
diff changeset
178 out = codecs.open("/tmp/outApplicants.xml","w","utf-8")
dwinter
parents:
diff changeset
179
dwinter
parents:
diff changeset
180 out.write(evs)
dwinter
parents:
diff changeset
181 out.close()
dwinter
parents:
diff changeset
182
dwinter
parents:
diff changeset
183 tf.close()