view coordinates/get_coordinates_from_chgis.php @ 19:3f1800e63c48

new overlay : China 1820 Prefecture (boundaries-only)
author Calvin Yeh <cyeh@mpipw-berlin.mpg.com>
date Thu, 23 Mar 2017 11:13:23 +0100
parents 57bde4830927
children
line wrap: on
line source

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
        <head>
                <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
                <style type="text/css">
			body{width:2000px;font-size:14px;}
			table{border-collapse:collapse;}
                        .row{
                        }
			.lastRow{
				/*border-bottom:1px solid #aaa;*/
			}
			.separator{
				height:1px;
				background:#aaa;
			}
			.coordinateUnfound{
				background:#F6CECE;
			}
			.coordinateOverlapped{
				background:#CEECF5;
			}
                        .column{
				min-height:20px;
                                vertical-align:top;
				border-right:1px solid #aaa;
				padding:0 5px;
                        }
			.level1{width:30px;}
			.level2{width:60px;}
			.level1Code{width:46px;}
			.level2Code{width:4px;}
			.years{width:68px;}
			.date{ width:30px;}
			.title{ width:100px;}
			.placeName,.name{width:80px;}
			.bookId{width:80px;}
			.xyCount{width:20px;}
			.adminType{ width:20px;}
			.mapUsed{width:30px;}
			.chgisId{width:4px;}
			.beginYear,.endYear{width:30px;}
			.sysId, .parentSysId{width:70px;}
			.transcription{width:90px;}
			.parent{width:90px;}
			.featureType{width:64px;}
			.x,.y{width:80px;}
			.dataSource{width:50px;}
                </style>
                <!--<script src="js/check_sections.js" charset="utf-8"></script>!-->
        </head>
        <body>

<?php
set_time_limit(0);
ini_set('memory_limit', '-1');

$fileAppend="";
if(isset($_GET['list']) && $_GET['list']==176){
	$fileAppend="_176";
}

$allCoordinateFromFileArray=loadCoordinateFromFile();

$bookListArray=getBookList();
$columnName=['level1','level1Code','level2','level2Code','years','date','bookId','title','placeName','xyCount','adminType', 
	//'beginYear','endYear','sysId','name','transcription','parentSysId','parent','featureType','x','y','dateSource'];
	'name','beginYear','endYear','x','y','sysId','presLoc','adminType','parent','parentSysId','dataSource','description'];
$csvColumnName=['Address','Name','TimeSpan:begin', 'TimeSpan:end', 'Longitude', 'Latitude', 'DATA_SOURCE', 'SYS_ID', 'PRES_LOC', 'ADMIN_TYPE', 'PARENT', 'PARENT_SYS_ID', 'Description'];


echo "<table>";
echo "<tr class='row'>";
foreach($columnName as $name){
	echo "<td class='".$name."'>".$name."</td>";	
}
echo "</tr>";
$bookListFp=fopen('./csv_files/list'.$fileAppend.'.csv','w');
$errorListFp=fopen('./csv_files/error'.$fileAppend.'.csv','w');
foreach($bookListArray as $book){
	$pattern='/([0-9]+)-([0-9]+)/';
	if(!preg_match($pattern,$book['years'],$match)){
		fputcsv($errorListFp,$book);
	}
	$tok=strtok($book['placeName'],',');
	$placeNameArray=array();
        while($tok!==false){
		$placeNameArray[]=$tok;
		$tok=strtok(",");
	}
	if(sizeof($placeNameArray)==0){
		$placeNameArray[0]=$book['placeName'];
		continue;
	}
	$book['bookId']=sprintf("%05d",$book['bookId']);
	$bookId=$book['bookId'];
	$fp=fopen('./csv_files/'.$bookId.'.csv','w');
	fputcsv($fp,$csvColumnName);
	
	$allCoordinateArray=array();
	foreach($placeNameArray as $placeName){
		
		$beginYear=$match[1];
		$endYear=$match[2];
		//$placeName=str_replace($book['adminType'],"",$book['placeName']);
		//$placeName=$placeName.$book['adminType'];
		//first, search with the admin type as the place name
		$coordinateFromChgisArray=getCoordinateFromChgis($placeName,$beginYear,$endYear);
		//if no result is returned, remove the admin type and search agamin
		if(sizeof($coordinateFromChgisArray)==0){
			$placeNameWithoutAdminType=str_replace($book['adminType'],"",$placeName);
			$coordinateFromChgisArray=getCoordinateFromChgis($placeNameWithoutAdminType,$beginYear,$endYear);
		}
		//get coordinate from the file 1820 & 1911
		$coordinateFromFileArray=getCoordinateFromFile($placeName);
		
		//columns of different sources
		//book: level1, level1Code, level2, level2Code, years, date, bookId, title, placeName, xyCount, adminType
		//chgis: beginYear, endYear, sysId, name, transcription, parentSysId, percent featureType, x, y(with #), dataSource
		//file: sysId, name, x,y,presLod, type, beginYear, endYear, dataSource
		
		//parse the result into one array
		$coordinateArray=array();
		$count=0;
		$chgisColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear', 'Longitude'=>'x', 'Latitude'=>'y', 
			'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'x'/**/, 'ADMIN_TYPE'=>'featureType', 'PARENT'=>'parent', 'PARENT_SYS_ID'=>'parentSysId'/*, 'Description'*/];
		foreach($coordinateFromChgisArray as $coor){
			foreach($chgisColumnMapping as $newName=>$oldName){
				$coordinateArray[$count][$newName]=$coor[$oldName];
			}
			$coordinateArray[$count]['PRES_LOC']='';
			//$coordinateArray[$count]['Description']='';
			$name=$coor['name'];
			$x=$coor['x'];
			$y=$coor['y'];
			$time=$coor['beginYear'].'-'.$coor['endYear'];
			$presLoc='';
			$parent=$coor['parent'];
			$adminType=$coor['featureType'];
			$dataSource=$coor['dataSource'];
			$coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '.
					$presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource;
			$count++;
		}
		
		$fileColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear', 
			'Longitude'=>'x', 'Latitude'=>'y', 'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'presLoc', 'ADMIN_TYPE'=>'type'
			, 'PARENT'=>'name'/**/, 'PARENT_SYS_ID'=>'name'/**//*, 'Description'*/];
		foreach($coordinateFromFileArray as $coor){
			foreach($fileColumnMapping as $newName=>$oldName){
				$coordinateArray[$count][$newName]=$coor[$oldName];
			}
			$coordinateArray[$count]['PARENT']='';
			$coordinateArray[$count]['PARENT_SYS_ID']='';
			$name=$coor['name'];
			$x=$coor['x'];
			$y=$coor['y'];
			$time=$coor['beginYear'].'-'.$coor['endYear'];
			$presLoc=$coor['presLoc'];
			$parent='';
			$adminType=$coor['type'];
			$dataSource=$coor['dataSource'];
			$coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '.
					$presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource;
			$count++;
		}
		
		foreach($coordinateArray as $coor){
			fputcsv($fp,$coor);
			$allCoordinateArray[]=$coor;	
		}

		
	}//end of foreach
	//write to the list file
	$array=[$book['level1'],$book['date'],$book['title'],$book['placeName'],$book['bookId'],sizeof($allCoordinateArray)];
	fputcsv($bookListFp, $array);

	$class="";
        $style="";
        if(sizeof($allCoordinateArray)>1){
                $class="coordinateOverlapped";
        	$class="";
                $style="style='background:#ceecf5'";
        }
	foreach($allCoordinateArray as $i=>$coordinate){
		$array=[$coordinate['Longitude'],$coordinate['Latitude']];
		fputcsv($bookListFp,$array);
		$coordinate['Longitude']="#".$coordinate['Longitude'];
                $coordinate['Latitude']="#".$coordinate['Latitude'];
                if($i==sizeof($allCoordinateArray)-1){
                	$class.=' lastRow';
                }
                echo "<tr class='".$class."' ".$style.">";
                foreach($book as $idx=>$property){
                	echo "<td class='column ".$idx."'>".$property."</td>";
                }
                foreach($coordinate as $idx=>$property){
                       	if($idx=='Address'){
                        	continue;
                        }
                 	echo "<td class='column ".$idx."'>".$property."</td>";
                 }
                 echo "</tr>";
	}
	if(sizeof($allCoordinateArray)==0){
                echo "<tr class='lastRow' style='background:#F6CECE'>";
                foreach($book as $idx=>$property){
                        echo "<td class='column ".$idx."'>".$property."</td>";
                }
                echo "</tr>";
        }
        echo "<tr class='separator'><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td></tr>";
                
	fclose($fp);
}
fclose($bookListFp);
echo "</table>";


function getBookList(){
	global $fileAppend;
	$fp=fopen("local_monographs_list".$fileAppend.".txt","r");
	$data=fgetcsv($fp,100000,"\t");
	$listArray=array();
	$bookNumber=0;
	while(!feof($fp)){
        	$data=fgetcsv($fp,1000000,"\t");
		$listArray[$bookNumber]['level1']=$data[0];
		$listArray[$bookNumber]['level1Code']=$data[1];
		$listArray[$bookNumber]['level2']=$data[2];
		$listArray[$bookNumber]['level2Code']=$data[3];
		$listArray[$bookNumber]['years']=$data[4];
		$listArray[$bookNumber]['date']=$data[5];
		$listArray[$bookNumber]['bookId']=$data[6];
		$listArray[$bookNumber]['title']=$data[8];
		$listArray[$bookNumber]['placeName']=$data[9];
		$listArray[$bookNumber]['xyCount']=$data[10];
		$listArray[$bookNumber]['adminType']=$data[11];
		$bookNumber++;
	}
	return $listArray;
}

function getCoordinateFromChgis($placeName,$beginYear,$endYear){
	$url="http://chgis.hmdc.harvard.edu/placename?n=".$placeName;
	$array=array();
	$count=0;
	$xml=simplexml_load_file($url);
	foreach($xml->placenames->placename as $property){
		$begin=intval($property->years->begin);
		$end=intval($property->years->end);
		$x=$property->{'xy-coordinates'}->x;
		$y=$property->{'xy-coordinates'}->y;
		//$x=str_replace(".","",$x);
		//$y=str_replace(".","",$y);
		if((($beginYear>=$begin && $beginYear<=$end) || ($endYear>=$begin && $endYear<=$end))
			&& (intval($x)!=0 && intval($y)!=0)){
			$array[$count]['beginYear']=$begin;
			$array[$count]['endYear']=$end;
			$array[$count]['sysId']=$property->attributes()->sys_id;
			$array[$count]['name']=$property->name;
			$array[$count]['transcription']=$property->transcription;
			$array[$count]['parentSysId']=$property->parent->attributes()->sys_id;
			$array[$count]['parent']=$property->parent;
			$array[$count]['featureType']=$property->{'feature-type'};
			$array[$count]['x']=$x;
			$array[$count]['y']=$y;
			$array[$count]['dataSource']=$property->{'data-source'};
			$count++;
		}
	}
	return $array;
}

function loadCoordinateFromFile(){
	$listArray=array();
	$yearArray=[1820,1911];
	$adminArray=['cnty','pref','prov'];
	$columnArray=array();
	$columnArray[1820]=[1=>'sysId',4=>'name',5=>'x',6=>'y',
			7=>'presLoc',9=>'type',11=>'beginYear',13=>'endYear'];
	$columnArray[1911]=[1=>'sysId',5=>'name',6=>'x',7=>'y',
			8=>'presLoc',10=>'type',12=>'beginYear',14=>'endYear'];
	foreach($yearArray as $year){
		foreach($adminArray as $admin){
			$fp=fopen("./1820_1911/".$year."_".$admin."_pts.txt","r");
        		$data=fgetcsv($fp,100000,"\t");
        		$count=0;
        		while(!feof($fp)){
                		$data=fgetcsv($fp,1000000,"\t");
				foreach($columnArray[$year] as $idx=>$column){
                			$listArray[$year][$admin][$column][$count]=$data[$idx];
				}
                		$listArray[$year][$admin]['dataSource'][$count]=$year;
				$count++;
			}
		}
	}
	//var_dump($listArray);
	return $listArray;
}
//$array=getCoordinateFromFile("江西");
//print_r($array);
function getCoordinateFromFile($place){
	global $allCoordinateFromFileArray;
	$coorArray=$allCoordinateFromFileArray;
	$yearArray=[1820,1911];
	$adminArray=['prov','pref','cnty'];
	$listArray=array();
	$count=0;
	foreach($yearArray as $year){
		foreach($adminArray as $admin){
			if(in_array($place,$coorArray[$year][$admin]['name'])){
				$resultArray=array_keys($coorArray[$year][$admin]['name'],$place);
				foreach($resultArray as $result){
					foreach($coorArray[$year][$admin] as $idx=>$val){
						$listArray[$count][$idx]=$coorArray[$year][$admin][$idx][$result];				
					}
					$count++;
				}
				break;
			}
		}
	}
	return $listArray;
}
?>
        </body>
</html>