diff map/coordinates/get_coordinates_from_chgis.php @ 0:b12c99b7c3f0

commit for previous development
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Mon, 19 Jan 2015 17:13:49 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/map/coordinates/get_coordinates_from_chgis.php	Mon Jan 19 17:13:49 2015 +0100
@@ -0,0 +1,330 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+        <head>
+                <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+                <style type="text/css">
+			body{width:2000px;font-size:14px;}
+			table{border-collapse:collapse;}
+                        .row{
+                        }
+			.lastRow{
+				/*border-bottom:1px solid #aaa;*/
+			}
+			.separator{
+				height:1px;
+				background:#aaa;
+			}
+			.coordinateUnfound{
+				background:#F6CECE;
+			}
+			.coordinateOverlapped{
+				background:#CEECF5;
+			}
+                        .column{
+				min-height:20px;
+                                vertical-align:top;
+				border-right:1px solid #aaa;
+				padding:0 5px;
+                        }
+			.level1{width:30px;}
+			.level2{width:60px;}
+			.level1Code{width:46px;}
+			.level2Code{width:4px;}
+			.years{width:68px;}
+			.date{ width:30px;}
+			.title{ width:100px;}
+			.placeName,.name{width:80px;}
+			.bookId{width:80px;}
+			.xyCount{width:20px;}
+			.adminType{ width:20px;}
+			.mapUsed{width:30px;}
+			.chgisId{width:4px;}
+			.beginYear,.endYear{width:30px;}
+			.sysId, .parentSysId{width:70px;}
+			.transcription{width:90px;}
+			.parent{width:90px;}
+			.featureType{width:64px;}
+			.x,.y{width:80px;}
+			.dataSource{width:50px;}
+                </style>
+                <!--<script src="js/check_sections.js" charset="utf-8"></script>!-->
+        </head>
+        <body>
+
+<?php
+set_time_limit(0);
+ini_set('memory_limit', '-1');
+
+$fileAppend="";
+if(isset($_GET['list']) && $_GET['list']==176){
+	$fileAppend="_176";
+}
+
+$allCoordinateFromFileArray=loadCoordinateFromFile();
+
+$bookListArray=getBookList();
+$columnName=['level1','level1Code','level2','level2Code','years','date','bookId','title','placeName','xyCount','adminType', 
+	//'beginYear','endYear','sysId','name','transcription','parentSysId','parent','featureType','x','y','dateSource'];
+	'name','beginYear','endYear','x','y','sysId','presLoc','adminType','parent','parentSysId','dataSource','description'];
+$csvColumnName=['Address','Name','TimeSpan:begin', 'TimeSpan:end', 'Longitude', 'Latitude', 'DATA_SOURCE', 'SYS_ID', 'PRES_LOC', 'ADMIN_TYPE', 'PARENT', 'PARENT_SYS_ID', 'Description'];
+
+
+echo "<table>";
+echo "<tr class='row'>";
+foreach($columnName as $name){
+	echo "<td class='".$name."'>".$name."</td>";	
+}
+echo "</tr>";
+$bookListFp=fopen('./csv_files/list'.$fileAppend.'.csv','w');
+$errorListFp=fopen('./csv_files/error'.$fileAppend.'.csv','w');
+foreach($bookListArray as $book){
+	$pattern='/([0-9]+)-([0-9]+)/';
+	if(!preg_match($pattern,$book['years'],$match)){
+		fputcsv($errorListFp,$book);
+	}
+	$tok=strtok($book['placeName'],',');
+	$placeNameArray=array();
+        while($tok!==false){
+		$placeNameArray[]=$tok;
+		$tok=strtok(",");
+	}
+	if(sizeof($placeNameArray)==0){
+		$placeNameArray[0]=$book['placeName'];
+		continue;
+	}
+	$book['bookId']=sprintf("%05d",$book['bookId']);
+	$bookId=$book['bookId'];
+	$fp=fopen('./csv_files/'.$bookId.'.csv','w');
+	fputcsv($fp,$csvColumnName);
+	
+	$allCoordinateArray=array();
+	foreach($placeNameArray as $placeName){
+		
+		$beginYear=$match[1];
+		$endYear=$match[2];
+		//$placeName=str_replace($book['adminType'],"",$book['placeName']);
+		//$placeName=$placeName.$book['adminType'];
+		//first, search with the admin type as the place name
+		$coordinateFromChgisArray=getCoordinateFromChgis($placeName,$beginYear,$endYear);
+		//if no result is returned, remove the admin type and search agamin
+		if(sizeof($coordinateFromChgisArray)==0){
+			$placeNameWithoutAdminType=str_replace($book['adminType'],"",$placeName);
+			$coordinateFromChgisArray=getCoordinateFromChgis($placeNameWithoutAdminType,$beginYear,$endYear);
+		}
+		//get coordinate from the file 1820 & 1911
+		$coordinateFromFileArray=getCoordinateFromFile($placeName);
+		
+		//columns of different sources
+		//book: level1, level1Code, level2, level2Code, years, date, bookId, title, placeName, xyCount, adminType
+		//chgis: beginYear, endYear, sysId, name, transcription, parentSysId, percent featureType, x, y(with #), dataSource
+		//file: sysId, name, x,y,presLod, type, beginYear, endYear, dataSource
+		
+		//parse the result into one array
+		$coordinateArray=array();
+		$count=0;
+		$chgisColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear', 'Longitude'=>'x', 'Latitude'=>'y', 
+			'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'x'/**/, 'ADMIN_TYPE'=>'featureType', 'PARENT'=>'parent', 'PARENT_SYS_ID'=>'parentSysId'/*, 'Description'*/];
+		foreach($coordinateFromChgisArray as $coor){
+			foreach($chgisColumnMapping as $newName=>$oldName){
+				$coordinateArray[$count][$newName]=$coor[$oldName];
+			}
+			$coordinateArray[$count]['PRES_LOC']='';
+			//$coordinateArray[$count]['Description']='';
+			$name=$coor['name'];
+			$x=$coor['x'];
+			$y=$coor['y'];
+			$time=$coor['beginYear'].'-'.$coor['endYear'];
+			$presLoc='';
+			$parent=$coor['parent'];
+			$adminType=$coor['featureType'];
+			$dataSource=$coor['dataSource'];
+			$coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '.
+					$presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource;
+			$count++;
+		}
+		
+		$fileColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear', 
+			'Longitude'=>'x', 'Latitude'=>'y', 'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'presLoc', 'ADMIN_TYPE'=>'type'
+			, 'PARENT'=>'name'/**/, 'PARENT_SYS_ID'=>'name'/**//*, 'Description'*/];
+		foreach($coordinateFromFileArray as $coor){
+			foreach($fileColumnMapping as $newName=>$oldName){
+				$coordinateArray[$count][$newName]=$coor[$oldName];
+			}
+			$coordinateArray[$count]['PARENT']='';
+			$coordinateArray[$count]['PARENT_SYS_ID']='';
+			$name=$coor['name'];
+			$x=$coor['x'];
+			$y=$coor['y'];
+			$time=$coor['beginYear'].'-'.$coor['endYear'];
+			$presLoc=$coor['presLoc'];
+			$parent='';
+			$adminType=$coor['type'];
+			$dataSource=$coor['dataSource'];
+			$coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '.
+					$presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource;
+			$count++;
+		}
+		
+		foreach($coordinateArray as $coor){
+			fputcsv($fp,$coor);
+			$allCoordinateArray[]=$coor;	
+		}
+
+		
+	}//end of foreach
+	//write to the list file
+	$array=[$book['level1'],$book['date'],$book['title'],$book['placeName'],$book['bookId'],sizeof($allCoordinateArray)];
+	fputcsv($bookListFp, $array);
+
+	$class="";
+        $style="";
+        if(sizeof($allCoordinateArray)>1){
+                $class="coordinateOverlapped";
+        	$class="";
+                $style="style='background:#ceecf5'";
+        }
+	foreach($allCoordinateArray as $i=>$coordinate){
+		$array=[$coordinate['Longitude'],$coordinate['Latitude']];
+		fputcsv($bookListFp,$array);
+		$coordinate['Longitude']="#".$coordinate['Longitude'];
+                $coordinate['Latitude']="#".$coordinate['Latitude'];
+                if($i==sizeof($allCoordinateArray)-1){
+                	$class.=' lastRow';
+                }
+                echo "<tr class='".$class."' ".$style.">";
+                foreach($book as $idx=>$property){
+                	echo "<td class='column ".$idx."'>".$property."</td>";
+                }
+                foreach($coordinate as $idx=>$property){
+                       	if($idx=='Address'){
+                        	continue;
+                        }
+                 	echo "<td class='column ".$idx."'>".$property."</td>";
+                 }
+                 echo "</tr>";
+	}
+	if(sizeof($allCoordinateArray)==0){
+                echo "<tr class='lastRow' style='background:#F6CECE'>";
+                foreach($book as $idx=>$property){
+                        echo "<td class='column ".$idx."'>".$property."</td>";
+                }
+                echo "</tr>";
+        }
+        echo "<tr class='separator'><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td></tr>";
+                
+	fclose($fp);
+}
+fclose($bookListFp);
+echo "</table>";
+
+
+function getBookList(){
+	global $fileAppend;
+	$fp=fopen("local_monographs_list".$fileAppend.".txt","r");
+	$data=fgetcsv($fp,100000,"\t");
+	$listArray=array();
+	$bookNumber=0;
+	while(!feof($fp)){
+        	$data=fgetcsv($fp,1000000,"\t");
+		$listArray[$bookNumber]['level1']=$data[0];
+		$listArray[$bookNumber]['level1Code']=$data[1];
+		$listArray[$bookNumber]['level2']=$data[2];
+		$listArray[$bookNumber]['level2Code']=$data[3];
+		$listArray[$bookNumber]['years']=$data[4];
+		$listArray[$bookNumber]['date']=$data[5];
+		$listArray[$bookNumber]['bookId']=$data[6];
+		$listArray[$bookNumber]['title']=$data[8];
+		$listArray[$bookNumber]['placeName']=$data[9];
+		$listArray[$bookNumber]['xyCount']=$data[10];
+		$listArray[$bookNumber]['adminType']=$data[11];
+		$bookNumber++;
+	}
+	return $listArray;
+}
+
+function getCoordinateFromChgis($placeName,$beginYear,$endYear){
+	$url="http://chgis.hmdc.harvard.edu/placename?n=".$placeName;
+	$array=array();
+	$count=0;
+	$xml=simplexml_load_file($url);
+	foreach($xml->placenames->placename as $property){
+		$begin=intval($property->years->begin);
+		$end=intval($property->years->end);
+		$x=$property->{'xy-coordinates'}->x;
+		$y=$property->{'xy-coordinates'}->y;
+		//$x=str_replace(".","",$x);
+		//$y=str_replace(".","",$y);
+		if((($beginYear>=$begin && $beginYear<=$end) || ($endYear>=$begin && $endYear<=$end))
+			&& (intval($x)!=0 && intval($y)!=0)){
+			$array[$count]['beginYear']=$begin;
+			$array[$count]['endYear']=$end;
+			$array[$count]['sysId']=$property->attributes()->sys_id;
+			$array[$count]['name']=$property->name;
+			$array[$count]['transcription']=$property->transcription;
+			$array[$count]['parentSysId']=$property->parent->attributes()->sys_id;
+			$array[$count]['parent']=$property->parent;
+			$array[$count]['featureType']=$property->{'feature-type'};
+			$array[$count]['x']=$x;
+			$array[$count]['y']=$y;
+			$array[$count]['dataSource']=$property->{'data-source'};
+			$count++;
+		}
+	}
+	return $array;
+}
+
+function loadCoordinateFromFile(){
+	$listArray=array();
+	$yearArray=[1820,1911];
+	$adminArray=['cnty','pref','prov'];
+	$columnArray=array();
+	$columnArray[1820]=[1=>'sysId',4=>'name',5=>'x',6=>'y',
+			7=>'presLoc',9=>'type',11=>'beginYear',13=>'endYear'];
+	$columnArray[1911]=[1=>'sysId',5=>'name',6=>'x',7=>'y',
+			8=>'presLoc',10=>'type',12=>'beginYear',14=>'endYear'];
+	foreach($yearArray as $year){
+		foreach($adminArray as $admin){
+			$fp=fopen("./1820_1911/".$year."_".$admin."_pts.txt","r");
+        		$data=fgetcsv($fp,100000,"\t");
+        		$count=0;
+        		while(!feof($fp)){
+                		$data=fgetcsv($fp,1000000,"\t");
+				foreach($columnArray[$year] as $idx=>$column){
+                			$listArray[$year][$admin][$column][$count]=$data[$idx];
+				}
+                		$listArray[$year][$admin]['dataSource'][$count]=$year;
+				$count++;
+			}
+		}
+	}
+	//var_dump($listArray);
+	return $listArray;
+}
+//$array=getCoordinateFromFile("江西");
+//print_r($array);
+function getCoordinateFromFile($place){
+	global $allCoordinateFromFileArray;
+	$coorArray=$allCoordinateFromFileArray;
+	$yearArray=[1820,1911];
+	$adminArray=['prov','pref','cnty'];
+	$listArray=array();
+	$count=0;
+	foreach($yearArray as $year){
+		foreach($adminArray as $admin){
+			if(in_array($place,$coorArray[$year][$admin]['name'])){
+				$resultArray=array_keys($coorArray[$year][$admin]['name'],$place);
+				foreach($resultArray as $result){
+					foreach($coorArray[$year][$admin] as $idx=>$val){
+						$listArray[$count][$idx]=$coorArray[$year][$admin][$idx][$result];				
+					}
+					$count++;
+				}
+				break;
+			}
+		}
+	}
+	return $listArray;
+}
+?>
+        </body>
+</html>