Mercurial > hg > extraction-interface
comparison map/coordinates/get_coordinates_from_chgis.php @ 0:b12c99b7c3f0
commit for previous development
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Mon, 19 Jan 2015 17:13:49 +0100 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b12c99b7c3f0 |
|---|---|
| 1 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | |
| 2 <html> | |
| 3 <head> | |
| 4 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> | |
| 5 <style type="text/css"> | |
| 6 body{width:2000px;font-size:14px;} | |
| 7 table{border-collapse:collapse;} | |
| 8 .row{ | |
| 9 } | |
| 10 .lastRow{ | |
| 11 /*border-bottom:1px solid #aaa;*/ | |
| 12 } | |
| 13 .separator{ | |
| 14 height:1px; | |
| 15 background:#aaa; | |
| 16 } | |
| 17 .coordinateUnfound{ | |
| 18 background:#F6CECE; | |
| 19 } | |
| 20 .coordinateOverlapped{ | |
| 21 background:#CEECF5; | |
| 22 } | |
| 23 .column{ | |
| 24 min-height:20px; | |
| 25 vertical-align:top; | |
| 26 border-right:1px solid #aaa; | |
| 27 padding:0 5px; | |
| 28 } | |
| 29 .level1{width:30px;} | |
| 30 .level2{width:60px;} | |
| 31 .level1Code{width:46px;} | |
| 32 .level2Code{width:4px;} | |
| 33 .years{width:68px;} | |
| 34 .date{ width:30px;} | |
| 35 .title{ width:100px;} | |
| 36 .placeName,.name{width:80px;} | |
| 37 .bookId{width:80px;} | |
| 38 .xyCount{width:20px;} | |
| 39 .adminType{ width:20px;} | |
| 40 .mapUsed{width:30px;} | |
| 41 .chgisId{width:4px;} | |
| 42 .beginYear,.endYear{width:30px;} | |
| 43 .sysId, .parentSysId{width:70px;} | |
| 44 .transcription{width:90px;} | |
| 45 .parent{width:90px;} | |
| 46 .featureType{width:64px;} | |
| 47 .x,.y{width:80px;} | |
| 48 .dataSource{width:50px;} | |
| 49 </style> | |
| 50 <!--<script src="js/check_sections.js" charset="utf-8"></script>!--> | |
| 51 </head> | |
| 52 <body> | |
| 53 | |
| 54 <?php | |
| 55 set_time_limit(0); | |
| 56 ini_set('memory_limit', '-1'); | |
| 57 | |
| 58 $fileAppend=""; | |
| 59 if(isset($_GET['list']) && $_GET['list']==176){ | |
| 60 $fileAppend="_176"; | |
| 61 } | |
| 62 | |
| 63 $allCoordinateFromFileArray=loadCoordinateFromFile(); | |
| 64 | |
| 65 $bookListArray=getBookList(); | |
| 66 $columnName=['level1','level1Code','level2','level2Code','years','date','bookId','title','placeName','xyCount','adminType', | |
| 67 //'beginYear','endYear','sysId','name','transcription','parentSysId','parent','featureType','x','y','dateSource']; | |
| 68 'name','beginYear','endYear','x','y','sysId','presLoc','adminType','parent','parentSysId','dataSource','description']; | |
| 69 $csvColumnName=['Address','Name','TimeSpan:begin', 'TimeSpan:end', 'Longitude', 'Latitude', 'DATA_SOURCE', 'SYS_ID', 'PRES_LOC', 'ADMIN_TYPE', 'PARENT', 'PARENT_SYS_ID', 'Description']; | |
| 70 | |
| 71 | |
| 72 echo "<table>"; | |
| 73 echo "<tr class='row'>"; | |
| 74 foreach($columnName as $name){ | |
| 75 echo "<td class='".$name."'>".$name."</td>"; | |
| 76 } | |
| 77 echo "</tr>"; | |
| 78 $bookListFp=fopen('./csv_files/list'.$fileAppend.'.csv','w'); | |
| 79 $errorListFp=fopen('./csv_files/error'.$fileAppend.'.csv','w'); | |
| 80 foreach($bookListArray as $book){ | |
| 81 $pattern='/([0-9]+)-([0-9]+)/'; | |
| 82 if(!preg_match($pattern,$book['years'],$match)){ | |
| 83 fputcsv($errorListFp,$book); | |
| 84 } | |
| 85 $tok=strtok($book['placeName'],','); | |
| 86 $placeNameArray=array(); | |
| 87 while($tok!==false){ | |
| 88 $placeNameArray[]=$tok; | |
| 89 $tok=strtok(","); | |
| 90 } | |
| 91 if(sizeof($placeNameArray)==0){ | |
| 92 $placeNameArray[0]=$book['placeName']; | |
| 93 continue; | |
| 94 } | |
| 95 $book['bookId']=sprintf("%05d",$book['bookId']); | |
| 96 $bookId=$book['bookId']; | |
| 97 $fp=fopen('./csv_files/'.$bookId.'.csv','w'); | |
| 98 fputcsv($fp,$csvColumnName); | |
| 99 | |
| 100 $allCoordinateArray=array(); | |
| 101 foreach($placeNameArray as $placeName){ | |
| 102 | |
| 103 $beginYear=$match[1]; | |
| 104 $endYear=$match[2]; | |
| 105 //$placeName=str_replace($book['adminType'],"",$book['placeName']); | |
| 106 //$placeName=$placeName.$book['adminType']; | |
| 107 //first, search with the admin type as the place name | |
| 108 $coordinateFromChgisArray=getCoordinateFromChgis($placeName,$beginYear,$endYear); | |
| 109 //if no result is returned, remove the admin type and search agamin | |
| 110 if(sizeof($coordinateFromChgisArray)==0){ | |
| 111 $placeNameWithoutAdminType=str_replace($book['adminType'],"",$placeName); | |
| 112 $coordinateFromChgisArray=getCoordinateFromChgis($placeNameWithoutAdminType,$beginYear,$endYear); | |
| 113 } | |
| 114 //get coordinate from the file 1820 & 1911 | |
| 115 $coordinateFromFileArray=getCoordinateFromFile($placeName); | |
| 116 | |
| 117 //columns of different sources | |
| 118 //book: level1, level1Code, level2, level2Code, years, date, bookId, title, placeName, xyCount, adminType | |
| 119 //chgis: beginYear, endYear, sysId, name, transcription, parentSysId, percent featureType, x, y(with #), dataSource | |
| 120 //file: sysId, name, x,y,presLod, type, beginYear, endYear, dataSource | |
| 121 | |
| 122 //parse the result into one array | |
| 123 $coordinateArray=array(); | |
| 124 $count=0; | |
| 125 $chgisColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear', 'Longitude'=>'x', 'Latitude'=>'y', | |
| 126 'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'x'/**/, 'ADMIN_TYPE'=>'featureType', 'PARENT'=>'parent', 'PARENT_SYS_ID'=>'parentSysId'/*, 'Description'*/]; | |
| 127 foreach($coordinateFromChgisArray as $coor){ | |
| 128 foreach($chgisColumnMapping as $newName=>$oldName){ | |
| 129 $coordinateArray[$count][$newName]=$coor[$oldName]; | |
| 130 } | |
| 131 $coordinateArray[$count]['PRES_LOC']=''; | |
| 132 //$coordinateArray[$count]['Description']=''; | |
| 133 $name=$coor['name']; | |
| 134 $x=$coor['x']; | |
| 135 $y=$coor['y']; | |
| 136 $time=$coor['beginYear'].'-'.$coor['endYear']; | |
| 137 $presLoc=''; | |
| 138 $parent=$coor['parent']; | |
| 139 $adminType=$coor['featureType']; | |
| 140 $dataSource=$coor['dataSource']; | |
| 141 $coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '. | |
| 142 $presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource; | |
| 143 $count++; | |
| 144 } | |
| 145 | |
| 146 $fileColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear', | |
| 147 'Longitude'=>'x', 'Latitude'=>'y', 'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'presLoc', 'ADMIN_TYPE'=>'type' | |
| 148 , 'PARENT'=>'name'/**/, 'PARENT_SYS_ID'=>'name'/**//*, 'Description'*/]; | |
| 149 foreach($coordinateFromFileArray as $coor){ | |
| 150 foreach($fileColumnMapping as $newName=>$oldName){ | |
| 151 $coordinateArray[$count][$newName]=$coor[$oldName]; | |
| 152 } | |
| 153 $coordinateArray[$count]['PARENT']=''; | |
| 154 $coordinateArray[$count]['PARENT_SYS_ID']=''; | |
| 155 $name=$coor['name']; | |
| 156 $x=$coor['x']; | |
| 157 $y=$coor['y']; | |
| 158 $time=$coor['beginYear'].'-'.$coor['endYear']; | |
| 159 $presLoc=$coor['presLoc']; | |
| 160 $parent=''; | |
| 161 $adminType=$coor['type']; | |
| 162 $dataSource=$coor['dataSource']; | |
| 163 $coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '. | |
| 164 $presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource; | |
| 165 $count++; | |
| 166 } | |
| 167 | |
| 168 foreach($coordinateArray as $coor){ | |
| 169 fputcsv($fp,$coor); | |
| 170 $allCoordinateArray[]=$coor; | |
| 171 } | |
| 172 | |
| 173 | |
| 174 }//end of foreach | |
| 175 //write to the list file | |
| 176 $array=[$book['level1'],$book['date'],$book['title'],$book['placeName'],$book['bookId'],sizeof($allCoordinateArray)]; | |
| 177 fputcsv($bookListFp, $array); | |
| 178 | |
| 179 $class=""; | |
| 180 $style=""; | |
| 181 if(sizeof($allCoordinateArray)>1){ | |
| 182 $class="coordinateOverlapped"; | |
| 183 $class=""; | |
| 184 $style="style='background:#ceecf5'"; | |
| 185 } | |
| 186 foreach($allCoordinateArray as $i=>$coordinate){ | |
| 187 $array=[$coordinate['Longitude'],$coordinate['Latitude']]; | |
| 188 fputcsv($bookListFp,$array); | |
| 189 $coordinate['Longitude']="#".$coordinate['Longitude']; | |
| 190 $coordinate['Latitude']="#".$coordinate['Latitude']; | |
| 191 if($i==sizeof($allCoordinateArray)-1){ | |
| 192 $class.=' lastRow'; | |
| 193 } | |
| 194 echo "<tr class='".$class."' ".$style.">"; | |
| 195 foreach($book as $idx=>$property){ | |
| 196 echo "<td class='column ".$idx."'>".$property."</td>"; | |
| 197 } | |
| 198 foreach($coordinate as $idx=>$property){ | |
| 199 if($idx=='Address'){ | |
| 200 continue; | |
| 201 } | |
| 202 echo "<td class='column ".$idx."'>".$property."</td>"; | |
| 203 } | |
| 204 echo "</tr>"; | |
| 205 } | |
| 206 if(sizeof($allCoordinateArray)==0){ | |
| 207 echo "<tr class='lastRow' style='background:#F6CECE'>"; | |
| 208 foreach($book as $idx=>$property){ | |
| 209 echo "<td class='column ".$idx."'>".$property."</td>"; | |
| 210 } | |
| 211 echo "</tr>"; | |
| 212 } | |
| 213 echo "<tr class='separator'><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td></tr>"; | |
| 214 | |
| 215 fclose($fp); | |
| 216 } | |
| 217 fclose($bookListFp); | |
| 218 echo "</table>"; | |
| 219 | |
| 220 | |
| 221 function getBookList(){ | |
| 222 global $fileAppend; | |
| 223 $fp=fopen("local_monographs_list".$fileAppend.".txt","r"); | |
| 224 $data=fgetcsv($fp,100000,"\t"); | |
| 225 $listArray=array(); | |
| 226 $bookNumber=0; | |
| 227 while(!feof($fp)){ | |
| 228 $data=fgetcsv($fp,1000000,"\t"); | |
| 229 $listArray[$bookNumber]['level1']=$data[0]; | |
| 230 $listArray[$bookNumber]['level1Code']=$data[1]; | |
| 231 $listArray[$bookNumber]['level2']=$data[2]; | |
| 232 $listArray[$bookNumber]['level2Code']=$data[3]; | |
| 233 $listArray[$bookNumber]['years']=$data[4]; | |
| 234 $listArray[$bookNumber]['date']=$data[5]; | |
| 235 $listArray[$bookNumber]['bookId']=$data[6]; | |
| 236 $listArray[$bookNumber]['title']=$data[8]; | |
| 237 $listArray[$bookNumber]['placeName']=$data[9]; | |
| 238 $listArray[$bookNumber]['xyCount']=$data[10]; | |
| 239 $listArray[$bookNumber]['adminType']=$data[11]; | |
| 240 $bookNumber++; | |
| 241 } | |
| 242 return $listArray; | |
| 243 } | |
| 244 | |
| 245 function getCoordinateFromChgis($placeName,$beginYear,$endYear){ | |
| 246 $url="http://chgis.hmdc.harvard.edu/placename?n=".$placeName; | |
| 247 $array=array(); | |
| 248 $count=0; | |
| 249 $xml=simplexml_load_file($url); | |
| 250 foreach($xml->placenames->placename as $property){ | |
| 251 $begin=intval($property->years->begin); | |
| 252 $end=intval($property->years->end); | |
| 253 $x=$property->{'xy-coordinates'}->x; | |
| 254 $y=$property->{'xy-coordinates'}->y; | |
| 255 //$x=str_replace(".","",$x); | |
| 256 //$y=str_replace(".","",$y); | |
| 257 if((($beginYear>=$begin && $beginYear<=$end) || ($endYear>=$begin && $endYear<=$end)) | |
| 258 && (intval($x)!=0 && intval($y)!=0)){ | |
| 259 $array[$count]['beginYear']=$begin; | |
| 260 $array[$count]['endYear']=$end; | |
| 261 $array[$count]['sysId']=$property->attributes()->sys_id; | |
| 262 $array[$count]['name']=$property->name; | |
| 263 $array[$count]['transcription']=$property->transcription; | |
| 264 $array[$count]['parentSysId']=$property->parent->attributes()->sys_id; | |
| 265 $array[$count]['parent']=$property->parent; | |
| 266 $array[$count]['featureType']=$property->{'feature-type'}; | |
| 267 $array[$count]['x']=$x; | |
| 268 $array[$count]['y']=$y; | |
| 269 $array[$count]['dataSource']=$property->{'data-source'}; | |
| 270 $count++; | |
| 271 } | |
| 272 } | |
| 273 return $array; | |
| 274 } | |
| 275 | |
| 276 function loadCoordinateFromFile(){ | |
| 277 $listArray=array(); | |
| 278 $yearArray=[1820,1911]; | |
| 279 $adminArray=['cnty','pref','prov']; | |
| 280 $columnArray=array(); | |
| 281 $columnArray[1820]=[1=>'sysId',4=>'name',5=>'x',6=>'y', | |
| 282 7=>'presLoc',9=>'type',11=>'beginYear',13=>'endYear']; | |
| 283 $columnArray[1911]=[1=>'sysId',5=>'name',6=>'x',7=>'y', | |
| 284 8=>'presLoc',10=>'type',12=>'beginYear',14=>'endYear']; | |
| 285 foreach($yearArray as $year){ | |
| 286 foreach($adminArray as $admin){ | |
| 287 $fp=fopen("./1820_1911/".$year."_".$admin."_pts.txt","r"); | |
| 288 $data=fgetcsv($fp,100000,"\t"); | |
| 289 $count=0; | |
| 290 while(!feof($fp)){ | |
| 291 $data=fgetcsv($fp,1000000,"\t"); | |
| 292 foreach($columnArray[$year] as $idx=>$column){ | |
| 293 $listArray[$year][$admin][$column][$count]=$data[$idx]; | |
| 294 } | |
| 295 $listArray[$year][$admin]['dataSource'][$count]=$year; | |
| 296 $count++; | |
| 297 } | |
| 298 } | |
| 299 } | |
| 300 //var_dump($listArray); | |
| 301 return $listArray; | |
| 302 } | |
| 303 //$array=getCoordinateFromFile("江西"); | |
| 304 //print_r($array); | |
| 305 function getCoordinateFromFile($place){ | |
| 306 global $allCoordinateFromFileArray; | |
| 307 $coorArray=$allCoordinateFromFileArray; | |
| 308 $yearArray=[1820,1911]; | |
| 309 $adminArray=['prov','pref','cnty']; | |
| 310 $listArray=array(); | |
| 311 $count=0; | |
| 312 foreach($yearArray as $year){ | |
| 313 foreach($adminArray as $admin){ | |
| 314 if(in_array($place,$coorArray[$year][$admin]['name'])){ | |
| 315 $resultArray=array_keys($coorArray[$year][$admin]['name'],$place); | |
| 316 foreach($resultArray as $result){ | |
| 317 foreach($coorArray[$year][$admin] as $idx=>$val){ | |
| 318 $listArray[$count][$idx]=$coorArray[$year][$admin][$idx][$result]; | |
| 319 } | |
| 320 $count++; | |
| 321 } | |
| 322 break; | |
| 323 } | |
| 324 } | |
| 325 } | |
| 326 return $listArray; | |
| 327 } | |
| 328 ?> | |
| 329 </body> | |
| 330 </html> |
