annotate harvestDataFromEvents.py @ 1:283badd62593

new version for embedded e4D
author dwinter
date Fri, 12 Oct 2012 08:36:57 +0200
parents 26c06d568e1d
children f198c86dd1f6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dwinter
parents:
diff changeset
1 '''
dwinter
parents:
diff changeset
2 Created on 11.09.2012
dwinter
parents:
diff changeset
3
dwinter
parents:
diff changeset
4 @author: dwinter
dwinter
parents:
diff changeset
5 '''
dwinter
parents:
diff changeset
6
dwinter
parents:
diff changeset
7
dwinter
parents:
diff changeset
8 from lxml import etree
dwinter
parents:
diff changeset
9 import os
dwinter
parents:
diff changeset
10 import codecs
dwinter
parents:
diff changeset
11
dwinter
parents:
diff changeset
12 class Event:
dwinter
parents:
diff changeset
13 long=""
dwinter
parents:
diff changeset
14 lat=""
dwinter
parents:
diff changeset
15 oid=""
dwinter
parents:
diff changeset
16 placename=""
dwinter
parents:
diff changeset
17 date=""
dwinter
parents:
diff changeset
18 species=""
dwinter
parents:
diff changeset
19 description=""
dwinter
parents:
diff changeset
20
dwinter
parents:
diff changeset
21 def __str__(self):
dwinter
parents:
diff changeset
22 ret=""
dwinter
parents:
diff changeset
23 ret="%s %s %s"%(self.oid,self.lat,self.long)
dwinter
parents:
diff changeset
24
dwinter
parents:
diff changeset
25 return ret
dwinter
parents:
diff changeset
26
dwinter
parents:
diff changeset
27
dwinter
parents:
diff changeset
28 def toKML(self):
dwinter
parents:
diff changeset
29 ret="""
dwinter
parents:
diff changeset
30 <Placemark>
dwinter
parents:
diff changeset
31 <name>%s - %s</name>
dwinter
parents:
diff changeset
32 <description><![CDATA[%s]]></description>
dwinter
parents:
diff changeset
33 <TimeStamp><when>%s</when></TimeStamp>
dwinter
parents:
diff changeset
34 <Point>
dwinter
parents:
diff changeset
35 <coordinates>%s,%s</coordinates>
dwinter
parents:
diff changeset
36 </Point>
dwinter
parents:
diff changeset
37 </Placemark>
dwinter
parents:
diff changeset
38 """%(self.oid,self.placename,self.description,self.date,self.long,self.lat)
dwinter
parents:
diff changeset
39
dwinter
parents:
diff changeset
40
dwinter
parents:
diff changeset
41
dwinter
parents:
diff changeset
42
dwinter
parents:
diff changeset
43 return ret
dwinter
parents:
diff changeset
44
dwinter
parents:
diff changeset
45 class EventKMLTransformer:
dwinter
parents:
diff changeset
46
dwinter
parents:
diff changeset
47 def __init__(self,sqlFilename=None):
dwinter
parents:
diff changeset
48 self.cnt=0
dwinter
parents:
diff changeset
49 #falls ein Filename anfgegeben wird dann wird ein sql-file erzeugt
dwinter
parents:
diff changeset
50 if sqlFilename is not None:
dwinter
parents:
diff changeset
51 self.f=codecs.open(sqlFilename,"w","utf-8")
dwinter
parents:
diff changeset
52 else:
dwinter
parents:
diff changeset
53 self.f=None
dwinter
parents:
diff changeset
54 #self.f = codecs.open("/tmp/out.sql","w","utf-8")
dwinter
parents:
diff changeset
55
dwinter
parents:
diff changeset
56 def createDescription(self,element):
dwinter
parents:
diff changeset
57 xslt_root=etree.parse("../../XSLT/entryToXHTML.xsl")
dwinter
parents:
diff changeset
58 transform = etree.XSLT(xslt_root)
dwinter
parents:
diff changeset
59 res=transform(element)
dwinter
parents:
diff changeset
60 return unicode(res)
dwinter
parents:
diff changeset
61
dwinter
parents:
diff changeset
62
dwinter
parents:
diff changeset
63 def calcDate(self,date):
dwinter
parents:
diff changeset
64
dwinter
parents:
diff changeset
65 splitted=date.split("-")
dwinter
parents:
diff changeset
66
dwinter
parents:
diff changeset
67 if len(splitted)==3:
dwinter
parents:
diff changeset
68 year="20"+splitted[2]
dwinter
parents:
diff changeset
69 month=splitted[1]
dwinter
parents:
diff changeset
70 date=splitted[0]
dwinter
parents:
diff changeset
71
dwinter
parents:
diff changeset
72 return "%s-%s-%s"%(year,month,date)
dwinter
parents:
diff changeset
73
dwinter
parents:
diff changeset
74 return "2000"
dwinter
parents:
diff changeset
75
dwinter
parents:
diff changeset
76
dwinter
parents:
diff changeset
77
dwinter
parents:
diff changeset
78 def writeToSQL(self,event,ident,cnt,f):
dwinter
parents:
diff changeset
79 insert="""
dwinter
parents:
diff changeset
80 INSERT INTO locations (data, "idTxt",id) VALUES ('%s', '%s',%s);"""
dwinter
parents:
diff changeset
81
dwinter
parents:
diff changeset
82 evString = etree.tostring(event).replace("'","")
dwinter
parents:
diff changeset
83 f.write(insert%(evString,ident,cnt))
dwinter
parents:
diff changeset
84
dwinter
parents:
diff changeset
85
dwinter
parents:
diff changeset
86
dwinter
parents:
diff changeset
87 def readString(self,xmlString,locationXPath,cnt):
dwinter
parents:
diff changeset
88
dwinter
parents:
diff changeset
89 tree = etree.XML(xmlString)
dwinter
parents:
diff changeset
90 return self.analyseEventXML(tree,locationXPath,cnt)
dwinter
parents:
diff changeset
91
dwinter
parents:
diff changeset
92
dwinter
parents:
diff changeset
93 def readFile(self,filePath,locationXPath,cnt):
dwinter
parents:
diff changeset
94 tree = etree.parse(filePath)
dwinter
parents:
diff changeset
95 return self.analyseEventXML(tree,locationXPath,cnt)
dwinter
parents:
diff changeset
96
dwinter
parents:
diff changeset
97 def analyseEventXML(self,tree,locationXPath,cnt):
dwinter
parents:
diff changeset
98 counter=0
dwinter
parents:
diff changeset
99
dwinter
parents:
diff changeset
100 ret=[]
dwinter
parents:
diff changeset
101 for event in tree.xpath("//event"):
dwinter
parents:
diff changeset
102
dwinter
parents:
diff changeset
103 text=event.get("text")
dwinter
parents:
diff changeset
104
dwinter
parents:
diff changeset
105
dwinter
parents:
diff changeset
106 #erzeuge zunaechste eine id aus dem textname und dem counter
dwinter
parents:
diff changeset
107
dwinter
parents:
diff changeset
108
dwinter
parents:
diff changeset
109
dwinter
parents:
diff changeset
110 #place_information=event.xpath(".//place_information")
dwinter
parents:
diff changeset
111 place_information=event.xpath(locationXPath)
dwinter
parents:
diff changeset
112
dwinter
parents:
diff changeset
113 for place in place_information:
dwinter
parents:
diff changeset
114 ev=Event()
dwinter
parents:
diff changeset
115 ev.date=self.calcDate(event.get("date_filed"))
dwinter
parents:
diff changeset
116 ev.description=self.createDescription(event)
dwinter
parents:
diff changeset
117 ev.oid="%s.%s"%(text,counter)
dwinter
parents:
diff changeset
118 counter+=1
dwinter
parents:
diff changeset
119 print ev.oid
dwinter
parents:
diff changeset
120 ev.placename=place.text
dwinter
parents:
diff changeset
121
dwinter
parents:
diff changeset
122 ev.lat=place.get("latitude")
dwinter
parents:
diff changeset
123 ev.long=place.get("longitude")
dwinter
parents:
diff changeset
124
dwinter
parents:
diff changeset
125 ret.append(ev)
dwinter
parents:
diff changeset
126
dwinter
parents:
diff changeset
127 if self.f is not None:
dwinter
parents:
diff changeset
128 self.writeToSQL(event,ev.oid,cnt,self.f)
dwinter
parents:
diff changeset
129 cnt+=1
dwinter
parents:
diff changeset
130 return ret,cnt
dwinter
parents:
diff changeset
131
1
283badd62593 new version for embedded e4D
dwinter
parents: 0
diff changeset
132 def readFiles(self,path,locationXPath,cnt=0):
0
dwinter
parents:
diff changeset
133 ret=[]
1
283badd62593 new version for embedded e4D
dwinter
parents: 0
diff changeset
134
0
dwinter
parents:
diff changeset
135 for f in os.listdir(path):
dwinter
parents:
diff changeset
136 ret2,cnt=self.readFile(path+f,locationXPath,cnt)
dwinter
parents:
diff changeset
137 ret+=ret2
dwinter
parents:
diff changeset
138
1
283badd62593 new version for embedded e4D
dwinter
parents: 0
diff changeset
139 return ret,cnt
0
dwinter
parents:
diff changeset
140
dwinter
parents:
diff changeset
141
dwinter
parents:
diff changeset
142 def toKML(self,events):
dwinter
parents:
diff changeset
143 ret="""<kml>
dwinter
parents:
diff changeset
144 """
dwinter
parents:
diff changeset
145 ret+="<Document>"
dwinter
parents:
diff changeset
146
dwinter
parents:
diff changeset
147 for event in events:
dwinter
parents:
diff changeset
148 ret+=event.toKML()
dwinter
parents:
diff changeset
149
dwinter
parents:
diff changeset
150 ret+="</Document>"
dwinter
parents:
diff changeset
151 ret+="</kml>"
dwinter
parents:
diff changeset
152
dwinter
parents:
diff changeset
153 return ret
dwinter
parents:
diff changeset
154
dwinter
parents:
diff changeset
155 def close(self):
dwinter
parents:
diff changeset
156 if self.f is not None:
dwinter
parents:
diff changeset
157 self.f.close()
dwinter
parents:
diff changeset
158
dwinter
parents:
diff changeset
159
dwinter
parents:
diff changeset
160 if __name__ == '__main__':
dwinter
parents:
diff changeset
161
dwinter
parents:
diff changeset
162 tf=EventKMLTransformer("/tmp/out.sql")
dwinter
parents:
diff changeset
163
1
283badd62593 new version for embedded e4D
dwinter
parents: 0
diff changeset
164 x,cnt = tf.readFiles("/Users/dwinter/Documents/Projekte/mmpa-permit-etienne/events/",".//research_location/place_information")
0
dwinter
parents:
diff changeset
165 #x = readFiles("/Users/dwinter/Documents/Projekte/mmpa-permit-etienne/Results/events/")
dwinter
parents:
diff changeset
166 evs= tf.toKML(x)
dwinter
parents:
diff changeset
167
dwinter
parents:
diff changeset
168 out = codecs.open("/tmp/outResearch.xml","w","utf-8")
dwinter
parents:
diff changeset
169
dwinter
parents:
diff changeset
170 out.write(evs)
dwinter
parents:
diff changeset
171 out.close()
dwinter
parents:
diff changeset
172
1
283badd62593 new version for embedded e4D
dwinter
parents: 0
diff changeset
173 x,cnt = tf.readFiles("/Users/dwinter/Documents/Projekte/mmpa-permit-etienne/events/",".//applicant_locations/place_information",cnt)
0
dwinter
parents:
diff changeset
174 #x = readFiles("/Users/dwinter/Documents/Projekte/mmpa-permit-etienne/Results/events/")
dwinter
parents:
diff changeset
175 evs= tf.toKML(x)
dwinter
parents:
diff changeset
176
dwinter
parents:
diff changeset
177 out = codecs.open("/tmp/outApplicants.xml","w","utf-8")
dwinter
parents:
diff changeset
178
dwinter
parents:
diff changeset
179 out.write(evs)
dwinter
parents:
diff changeset
180 out.close()
dwinter
parents:
diff changeset
181
dwinter
parents:
diff changeset
182 tf.close()