comparison coordinates/get_coordinates_from_chgis.php @ 0:57bde4830927

first commit
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Tue, 24 Mar 2015 11:37:17 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:57bde4830927
1 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2 <html>
3 <head>
4 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
5 <style type="text/css">
6 body{width:2000px;font-size:14px;}
7 table{border-collapse:collapse;}
8 .row{
9 }
10 .lastRow{
11 /*border-bottom:1px solid #aaa;*/
12 }
13 .separator{
14 height:1px;
15 background:#aaa;
16 }
17 .coordinateUnfound{
18 background:#F6CECE;
19 }
20 .coordinateOverlapped{
21 background:#CEECF5;
22 }
23 .column{
24 min-height:20px;
25 vertical-align:top;
26 border-right:1px solid #aaa;
27 padding:0 5px;
28 }
29 .level1{width:30px;}
30 .level2{width:60px;}
31 .level1Code{width:46px;}
32 .level2Code{width:4px;}
33 .years{width:68px;}
34 .date{ width:30px;}
35 .title{ width:100px;}
36 .placeName,.name{width:80px;}
37 .bookId{width:80px;}
38 .xyCount{width:20px;}
39 .adminType{ width:20px;}
40 .mapUsed{width:30px;}
41 .chgisId{width:4px;}
42 .beginYear,.endYear{width:30px;}
43 .sysId, .parentSysId{width:70px;}
44 .transcription{width:90px;}
45 .parent{width:90px;}
46 .featureType{width:64px;}
47 .x,.y{width:80px;}
48 .dataSource{width:50px;}
49 </style>
50 <!--<script src="js/check_sections.js" charset="utf-8"></script>!-->
51 </head>
52 <body>
53
54 <?php
55 set_time_limit(0);
56 ini_set('memory_limit', '-1');
57
58 $fileAppend="";
59 if(isset($_GET['list']) && $_GET['list']==176){
60 $fileAppend="_176";
61 }
62
63 $allCoordinateFromFileArray=loadCoordinateFromFile();
64
65 $bookListArray=getBookList();
66 $columnName=['level1','level1Code','level2','level2Code','years','date','bookId','title','placeName','xyCount','adminType',
67 //'beginYear','endYear','sysId','name','transcription','parentSysId','parent','featureType','x','y','dateSource'];
68 'name','beginYear','endYear','x','y','sysId','presLoc','adminType','parent','parentSysId','dataSource','description'];
69 $csvColumnName=['Address','Name','TimeSpan:begin', 'TimeSpan:end', 'Longitude', 'Latitude', 'DATA_SOURCE', 'SYS_ID', 'PRES_LOC', 'ADMIN_TYPE', 'PARENT', 'PARENT_SYS_ID', 'Description'];
70
71
72 echo "<table>";
73 echo "<tr class='row'>";
74 foreach($columnName as $name){
75 echo "<td class='".$name."'>".$name."</td>";
76 }
77 echo "</tr>";
78 $bookListFp=fopen('./csv_files/list'.$fileAppend.'.csv','w');
79 $errorListFp=fopen('./csv_files/error'.$fileAppend.'.csv','w');
80 foreach($bookListArray as $book){
81 $pattern='/([0-9]+)-([0-9]+)/';
82 if(!preg_match($pattern,$book['years'],$match)){
83 fputcsv($errorListFp,$book);
84 }
85 $tok=strtok($book['placeName'],',');
86 $placeNameArray=array();
87 while($tok!==false){
88 $placeNameArray[]=$tok;
89 $tok=strtok(",");
90 }
91 if(sizeof($placeNameArray)==0){
92 $placeNameArray[0]=$book['placeName'];
93 continue;
94 }
95 $book['bookId']=sprintf("%05d",$book['bookId']);
96 $bookId=$book['bookId'];
97 $fp=fopen('./csv_files/'.$bookId.'.csv','w');
98 fputcsv($fp,$csvColumnName);
99
100 $allCoordinateArray=array();
101 foreach($placeNameArray as $placeName){
102
103 $beginYear=$match[1];
104 $endYear=$match[2];
105 //$placeName=str_replace($book['adminType'],"",$book['placeName']);
106 //$placeName=$placeName.$book['adminType'];
107 //first, search with the admin type as the place name
108 $coordinateFromChgisArray=getCoordinateFromChgis($placeName,$beginYear,$endYear);
109 //if no result is returned, remove the admin type and search agamin
110 if(sizeof($coordinateFromChgisArray)==0){
111 $placeNameWithoutAdminType=str_replace($book['adminType'],"",$placeName);
112 $coordinateFromChgisArray=getCoordinateFromChgis($placeNameWithoutAdminType,$beginYear,$endYear);
113 }
114 //get coordinate from the file 1820 & 1911
115 $coordinateFromFileArray=getCoordinateFromFile($placeName);
116
117 //columns of different sources
118 //book: level1, level1Code, level2, level2Code, years, date, bookId, title, placeName, xyCount, adminType
119 //chgis: beginYear, endYear, sysId, name, transcription, parentSysId, percent featureType, x, y(with #), dataSource
120 //file: sysId, name, x,y,presLod, type, beginYear, endYear, dataSource
121
122 //parse the result into one array
123 $coordinateArray=array();
124 $count=0;
125 $chgisColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear', 'Longitude'=>'x', 'Latitude'=>'y',
126 'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'x'/**/, 'ADMIN_TYPE'=>'featureType', 'PARENT'=>'parent', 'PARENT_SYS_ID'=>'parentSysId'/*, 'Description'*/];
127 foreach($coordinateFromChgisArray as $coor){
128 foreach($chgisColumnMapping as $newName=>$oldName){
129 $coordinateArray[$count][$newName]=$coor[$oldName];
130 }
131 $coordinateArray[$count]['PRES_LOC']='';
132 //$coordinateArray[$count]['Description']='';
133 $name=$coor['name'];
134 $x=$coor['x'];
135 $y=$coor['y'];
136 $time=$coor['beginYear'].'-'.$coor['endYear'];
137 $presLoc='';
138 $parent=$coor['parent'];
139 $adminType=$coor['featureType'];
140 $dataSource=$coor['dataSource'];
141 $coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '.
142 $presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource;
143 $count++;
144 }
145
146 $fileColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear',
147 'Longitude'=>'x', 'Latitude'=>'y', 'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'presLoc', 'ADMIN_TYPE'=>'type'
148 , 'PARENT'=>'name'/**/, 'PARENT_SYS_ID'=>'name'/**//*, 'Description'*/];
149 foreach($coordinateFromFileArray as $coor){
150 foreach($fileColumnMapping as $newName=>$oldName){
151 $coordinateArray[$count][$newName]=$coor[$oldName];
152 }
153 $coordinateArray[$count]['PARENT']='';
154 $coordinateArray[$count]['PARENT_SYS_ID']='';
155 $name=$coor['name'];
156 $x=$coor['x'];
157 $y=$coor['y'];
158 $time=$coor['beginYear'].'-'.$coor['endYear'];
159 $presLoc=$coor['presLoc'];
160 $parent='';
161 $adminType=$coor['type'];
162 $dataSource=$coor['dataSource'];
163 $coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '.
164 $presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource;
165 $count++;
166 }
167
168 foreach($coordinateArray as $coor){
169 fputcsv($fp,$coor);
170 $allCoordinateArray[]=$coor;
171 }
172
173
174 }//end of foreach
175 //write to the list file
176 $array=[$book['level1'],$book['date'],$book['title'],$book['placeName'],$book['bookId'],sizeof($allCoordinateArray)];
177 fputcsv($bookListFp, $array);
178
179 $class="";
180 $style="";
181 if(sizeof($allCoordinateArray)>1){
182 $class="coordinateOverlapped";
183 $class="";
184 $style="style='background:#ceecf5'";
185 }
186 foreach($allCoordinateArray as $i=>$coordinate){
187 $array=[$coordinate['Longitude'],$coordinate['Latitude']];
188 fputcsv($bookListFp,$array);
189 $coordinate['Longitude']="#".$coordinate['Longitude'];
190 $coordinate['Latitude']="#".$coordinate['Latitude'];
191 if($i==sizeof($allCoordinateArray)-1){
192 $class.=' lastRow';
193 }
194 echo "<tr class='".$class."' ".$style.">";
195 foreach($book as $idx=>$property){
196 echo "<td class='column ".$idx."'>".$property."</td>";
197 }
198 foreach($coordinate as $idx=>$property){
199 if($idx=='Address'){
200 continue;
201 }
202 echo "<td class='column ".$idx."'>".$property."</td>";
203 }
204 echo "</tr>";
205 }
206 if(sizeof($allCoordinateArray)==0){
207 echo "<tr class='lastRow' style='background:#F6CECE'>";
208 foreach($book as $idx=>$property){
209 echo "<td class='column ".$idx."'>".$property."</td>";
210 }
211 echo "</tr>";
212 }
213 echo "<tr class='separator'><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td></tr>";
214
215 fclose($fp);
216 }
217 fclose($bookListFp);
218 echo "</table>";
219
220
221 function getBookList(){
222 global $fileAppend;
223 $fp=fopen("local_monographs_list".$fileAppend.".txt","r");
224 $data=fgetcsv($fp,100000,"\t");
225 $listArray=array();
226 $bookNumber=0;
227 while(!feof($fp)){
228 $data=fgetcsv($fp,1000000,"\t");
229 $listArray[$bookNumber]['level1']=$data[0];
230 $listArray[$bookNumber]['level1Code']=$data[1];
231 $listArray[$bookNumber]['level2']=$data[2];
232 $listArray[$bookNumber]['level2Code']=$data[3];
233 $listArray[$bookNumber]['years']=$data[4];
234 $listArray[$bookNumber]['date']=$data[5];
235 $listArray[$bookNumber]['bookId']=$data[6];
236 $listArray[$bookNumber]['title']=$data[8];
237 $listArray[$bookNumber]['placeName']=$data[9];
238 $listArray[$bookNumber]['xyCount']=$data[10];
239 $listArray[$bookNumber]['adminType']=$data[11];
240 $bookNumber++;
241 }
242 return $listArray;
243 }
244
245 function getCoordinateFromChgis($placeName,$beginYear,$endYear){
246 $url="http://chgis.hmdc.harvard.edu/placename?n=".$placeName;
247 $array=array();
248 $count=0;
249 $xml=simplexml_load_file($url);
250 foreach($xml->placenames->placename as $property){
251 $begin=intval($property->years->begin);
252 $end=intval($property->years->end);
253 $x=$property->{'xy-coordinates'}->x;
254 $y=$property->{'xy-coordinates'}->y;
255 //$x=str_replace(".","",$x);
256 //$y=str_replace(".","",$y);
257 if((($beginYear>=$begin && $beginYear<=$end) || ($endYear>=$begin && $endYear<=$end))
258 && (intval($x)!=0 && intval($y)!=0)){
259 $array[$count]['beginYear']=$begin;
260 $array[$count]['endYear']=$end;
261 $array[$count]['sysId']=$property->attributes()->sys_id;
262 $array[$count]['name']=$property->name;
263 $array[$count]['transcription']=$property->transcription;
264 $array[$count]['parentSysId']=$property->parent->attributes()->sys_id;
265 $array[$count]['parent']=$property->parent;
266 $array[$count]['featureType']=$property->{'feature-type'};
267 $array[$count]['x']=$x;
268 $array[$count]['y']=$y;
269 $array[$count]['dataSource']=$property->{'data-source'};
270 $count++;
271 }
272 }
273 return $array;
274 }
275
276 function loadCoordinateFromFile(){
277 $listArray=array();
278 $yearArray=[1820,1911];
279 $adminArray=['cnty','pref','prov'];
280 $columnArray=array();
281 $columnArray[1820]=[1=>'sysId',4=>'name',5=>'x',6=>'y',
282 7=>'presLoc',9=>'type',11=>'beginYear',13=>'endYear'];
283 $columnArray[1911]=[1=>'sysId',5=>'name',6=>'x',7=>'y',
284 8=>'presLoc',10=>'type',12=>'beginYear',14=>'endYear'];
285 foreach($yearArray as $year){
286 foreach($adminArray as $admin){
287 $fp=fopen("./1820_1911/".$year."_".$admin."_pts.txt","r");
288 $data=fgetcsv($fp,100000,"\t");
289 $count=0;
290 while(!feof($fp)){
291 $data=fgetcsv($fp,1000000,"\t");
292 foreach($columnArray[$year] as $idx=>$column){
293 $listArray[$year][$admin][$column][$count]=$data[$idx];
294 }
295 $listArray[$year][$admin]['dataSource'][$count]=$year;
296 $count++;
297 }
298 }
299 }
300 //var_dump($listArray);
301 return $listArray;
302 }
303 //$array=getCoordinateFromFile("江西");
304 //print_r($array);
305 function getCoordinateFromFile($place){
306 global $allCoordinateFromFileArray;
307 $coorArray=$allCoordinateFromFileArray;
308 $yearArray=[1820,1911];
309 $adminArray=['prov','pref','cnty'];
310 $listArray=array();
311 $count=0;
312 foreach($yearArray as $year){
313 foreach($adminArray as $admin){
314 if(in_array($place,$coorArray[$year][$admin]['name'])){
315 $resultArray=array_keys($coorArray[$year][$admin]['name'],$place);
316 foreach($resultArray as $result){
317 foreach($coorArray[$year][$admin] as $idx=>$val){
318 $listArray[$count][$idx]=$coorArray[$year][$admin][$idx][$result];
319 }
320 $count++;
321 }
322 break;
323 }
324 }
325 }
326 return $listArray;
327 }
328 ?>
329 </body>
330 </html>