Mercurial > hg > extraction-interface
diff map/coordinates/get_coordinates_from_chgis.php @ 0:b12c99b7c3f0
commit for previous development
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 19 Jan 2015 17:13:49 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/map/coordinates/get_coordinates_from_chgis.php Mon Jan 19 17:13:49 2015 +0100 @@ -0,0 +1,330 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> + <style type="text/css"> + body{width:2000px;font-size:14px;} + table{border-collapse:collapse;} + .row{ + } + .lastRow{ + /*border-bottom:1px solid #aaa;*/ + } + .separator{ + height:1px; + background:#aaa; + } + .coordinateUnfound{ + background:#F6CECE; + } + .coordinateOverlapped{ + background:#CEECF5; + } + .column{ + min-height:20px; + vertical-align:top; + border-right:1px solid #aaa; + padding:0 5px; + } + .level1{width:30px;} + .level2{width:60px;} + .level1Code{width:46px;} + .level2Code{width:4px;} + .years{width:68px;} + .date{ width:30px;} + .title{ width:100px;} + .placeName,.name{width:80px;} + .bookId{width:80px;} + .xyCount{width:20px;} + .adminType{ width:20px;} + .mapUsed{width:30px;} + .chgisId{width:4px;} + .beginYear,.endYear{width:30px;} + .sysId, .parentSysId{width:70px;} + .transcription{width:90px;} + .parent{width:90px;} + .featureType{width:64px;} + .x,.y{width:80px;} + .dataSource{width:50px;} + </style> + <!--<script src="js/check_sections.js" charset="utf-8"></script>!--> + </head> + <body> + +<?php +set_time_limit(0); +ini_set('memory_limit', '-1'); + +$fileAppend=""; +if(isset($_GET['list']) && $_GET['list']==176){ + $fileAppend="_176"; +} + +$allCoordinateFromFileArray=loadCoordinateFromFile(); + +$bookListArray=getBookList(); +$columnName=['level1','level1Code','level2','level2Code','years','date','bookId','title','placeName','xyCount','adminType', + //'beginYear','endYear','sysId','name','transcription','parentSysId','parent','featureType','x','y','dateSource']; + 'name','beginYear','endYear','x','y','sysId','presLoc','adminType','parent','parentSysId','dataSource','description']; +$csvColumnName=['Address','Name','TimeSpan:begin', 'TimeSpan:end', 'Longitude', 'Latitude', 'DATA_SOURCE', 'SYS_ID', 'PRES_LOC', 'ADMIN_TYPE', 'PARENT', 'PARENT_SYS_ID', 'Description']; + + +echo "<table>"; +echo "<tr class='row'>"; +foreach($columnName as $name){ + echo "<td class='".$name."'>".$name."</td>"; +} +echo "</tr>"; +$bookListFp=fopen('./csv_files/list'.$fileAppend.'.csv','w'); +$errorListFp=fopen('./csv_files/error'.$fileAppend.'.csv','w'); +foreach($bookListArray as $book){ + $pattern='/([0-9]+)-([0-9]+)/'; + if(!preg_match($pattern,$book['years'],$match)){ + fputcsv($errorListFp,$book); + } + $tok=strtok($book['placeName'],','); + $placeNameArray=array(); + while($tok!==false){ + $placeNameArray[]=$tok; + $tok=strtok(","); + } + if(sizeof($placeNameArray)==0){ + $placeNameArray[0]=$book['placeName']; + continue; + } + $book['bookId']=sprintf("%05d",$book['bookId']); + $bookId=$book['bookId']; + $fp=fopen('./csv_files/'.$bookId.'.csv','w'); + fputcsv($fp,$csvColumnName); + + $allCoordinateArray=array(); + foreach($placeNameArray as $placeName){ + + $beginYear=$match[1]; + $endYear=$match[2]; + //$placeName=str_replace($book['adminType'],"",$book['placeName']); + //$placeName=$placeName.$book['adminType']; + //first, search with the admin type as the place name + $coordinateFromChgisArray=getCoordinateFromChgis($placeName,$beginYear,$endYear); + //if no result is returned, remove the admin type and search agamin + if(sizeof($coordinateFromChgisArray)==0){ + $placeNameWithoutAdminType=str_replace($book['adminType'],"",$placeName); + $coordinateFromChgisArray=getCoordinateFromChgis($placeNameWithoutAdminType,$beginYear,$endYear); + } + //get coordinate from the file 1820 & 1911 + $coordinateFromFileArray=getCoordinateFromFile($placeName); + + //columns of different sources + //book: level1, level1Code, level2, level2Code, years, date, bookId, title, placeName, xyCount, adminType + //chgis: beginYear, endYear, sysId, name, transcription, parentSysId, percent featureType, x, y(with #), dataSource + //file: sysId, name, x,y,presLod, type, beginYear, endYear, dataSource + + //parse the result into one array + $coordinateArray=array(); + $count=0; + $chgisColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear', 'Longitude'=>'x', 'Latitude'=>'y', + 'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'x'/**/, 'ADMIN_TYPE'=>'featureType', 'PARENT'=>'parent', 'PARENT_SYS_ID'=>'parentSysId'/*, 'Description'*/]; + foreach($coordinateFromChgisArray as $coor){ + foreach($chgisColumnMapping as $newName=>$oldName){ + $coordinateArray[$count][$newName]=$coor[$oldName]; + } + $coordinateArray[$count]['PRES_LOC']=''; + //$coordinateArray[$count]['Description']=''; + $name=$coor['name']; + $x=$coor['x']; + $y=$coor['y']; + $time=$coor['beginYear'].'-'.$coor['endYear']; + $presLoc=''; + $parent=$coor['parent']; + $adminType=$coor['featureType']; + $dataSource=$coor['dataSource']; + $coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '. + $presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource; + $count++; + } + + $fileColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear', + 'Longitude'=>'x', 'Latitude'=>'y', 'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'presLoc', 'ADMIN_TYPE'=>'type' + , 'PARENT'=>'name'/**/, 'PARENT_SYS_ID'=>'name'/**//*, 'Description'*/]; + foreach($coordinateFromFileArray as $coor){ + foreach($fileColumnMapping as $newName=>$oldName){ + $coordinateArray[$count][$newName]=$coor[$oldName]; + } + $coordinateArray[$count]['PARENT']=''; + $coordinateArray[$count]['PARENT_SYS_ID']=''; + $name=$coor['name']; + $x=$coor['x']; + $y=$coor['y']; + $time=$coor['beginYear'].'-'.$coor['endYear']; + $presLoc=$coor['presLoc']; + $parent=''; + $adminType=$coor['type']; + $dataSource=$coor['dataSource']; + $coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '. + $presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource; + $count++; + } + + foreach($coordinateArray as $coor){ + fputcsv($fp,$coor); + $allCoordinateArray[]=$coor; + } + + + }//end of foreach + //write to the list file + $array=[$book['level1'],$book['date'],$book['title'],$book['placeName'],$book['bookId'],sizeof($allCoordinateArray)]; + fputcsv($bookListFp, $array); + + $class=""; + $style=""; + if(sizeof($allCoordinateArray)>1){ + $class="coordinateOverlapped"; + $class=""; + $style="style='background:#ceecf5'"; + } + foreach($allCoordinateArray as $i=>$coordinate){ + $array=[$coordinate['Longitude'],$coordinate['Latitude']]; + fputcsv($bookListFp,$array); + $coordinate['Longitude']="#".$coordinate['Longitude']; + $coordinate['Latitude']="#".$coordinate['Latitude']; + if($i==sizeof($allCoordinateArray)-1){ + $class.=' lastRow'; + } + echo "<tr class='".$class."' ".$style.">"; + foreach($book as $idx=>$property){ + echo "<td class='column ".$idx."'>".$property."</td>"; + } + foreach($coordinate as $idx=>$property){ + if($idx=='Address'){ + continue; + } + echo "<td class='column ".$idx."'>".$property."</td>"; + } + echo "</tr>"; + } + if(sizeof($allCoordinateArray)==0){ + echo "<tr class='lastRow' style='background:#F6CECE'>"; + foreach($book as $idx=>$property){ + echo "<td class='column ".$idx."'>".$property."</td>"; + } + echo "</tr>"; + } + echo "<tr class='separator'><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td></tr>"; + + fclose($fp); +} +fclose($bookListFp); +echo "</table>"; + + +function getBookList(){ + global $fileAppend; + $fp=fopen("local_monographs_list".$fileAppend.".txt","r"); + $data=fgetcsv($fp,100000,"\t"); + $listArray=array(); + $bookNumber=0; + while(!feof($fp)){ + $data=fgetcsv($fp,1000000,"\t"); + $listArray[$bookNumber]['level1']=$data[0]; + $listArray[$bookNumber]['level1Code']=$data[1]; + $listArray[$bookNumber]['level2']=$data[2]; + $listArray[$bookNumber]['level2Code']=$data[3]; + $listArray[$bookNumber]['years']=$data[4]; + $listArray[$bookNumber]['date']=$data[5]; + $listArray[$bookNumber]['bookId']=$data[6]; + $listArray[$bookNumber]['title']=$data[8]; + $listArray[$bookNumber]['placeName']=$data[9]; + $listArray[$bookNumber]['xyCount']=$data[10]; + $listArray[$bookNumber]['adminType']=$data[11]; + $bookNumber++; + } + return $listArray; +} + +function getCoordinateFromChgis($placeName,$beginYear,$endYear){ + $url="http://chgis.hmdc.harvard.edu/placename?n=".$placeName; + $array=array(); + $count=0; + $xml=simplexml_load_file($url); + foreach($xml->placenames->placename as $property){ + $begin=intval($property->years->begin); + $end=intval($property->years->end); + $x=$property->{'xy-coordinates'}->x; + $y=$property->{'xy-coordinates'}->y; + //$x=str_replace(".","",$x); + //$y=str_replace(".","",$y); + if((($beginYear>=$begin && $beginYear<=$end) || ($endYear>=$begin && $endYear<=$end)) + && (intval($x)!=0 && intval($y)!=0)){ + $array[$count]['beginYear']=$begin; + $array[$count]['endYear']=$end; + $array[$count]['sysId']=$property->attributes()->sys_id; + $array[$count]['name']=$property->name; + $array[$count]['transcription']=$property->transcription; + $array[$count]['parentSysId']=$property->parent->attributes()->sys_id; + $array[$count]['parent']=$property->parent; + $array[$count]['featureType']=$property->{'feature-type'}; + $array[$count]['x']=$x; + $array[$count]['y']=$y; + $array[$count]['dataSource']=$property->{'data-source'}; + $count++; + } + } + return $array; +} + +function loadCoordinateFromFile(){ + $listArray=array(); + $yearArray=[1820,1911]; + $adminArray=['cnty','pref','prov']; + $columnArray=array(); + $columnArray[1820]=[1=>'sysId',4=>'name',5=>'x',6=>'y', + 7=>'presLoc',9=>'type',11=>'beginYear',13=>'endYear']; + $columnArray[1911]=[1=>'sysId',5=>'name',6=>'x',7=>'y', + 8=>'presLoc',10=>'type',12=>'beginYear',14=>'endYear']; + foreach($yearArray as $year){ + foreach($adminArray as $admin){ + $fp=fopen("./1820_1911/".$year."_".$admin."_pts.txt","r"); + $data=fgetcsv($fp,100000,"\t"); + $count=0; + while(!feof($fp)){ + $data=fgetcsv($fp,1000000,"\t"); + foreach($columnArray[$year] as $idx=>$column){ + $listArray[$year][$admin][$column][$count]=$data[$idx]; + } + $listArray[$year][$admin]['dataSource'][$count]=$year; + $count++; + } + } + } + //var_dump($listArray); + return $listArray; +} +//$array=getCoordinateFromFile("江西"); +//print_r($array); +function getCoordinateFromFile($place){ + global $allCoordinateFromFileArray; + $coorArray=$allCoordinateFromFileArray; + $yearArray=[1820,1911]; + $adminArray=['prov','pref','cnty']; + $listArray=array(); + $count=0; + foreach($yearArray as $year){ + foreach($adminArray as $admin){ + if(in_array($place,$coorArray[$year][$admin]['name'])){ + $resultArray=array_keys($coorArray[$year][$admin]['name'],$place); + foreach($resultArray as $result){ + foreach($coorArray[$year][$admin] as $idx=>$val){ + $listArray[$count][$idx]=$coorArray[$year][$admin][$idx][$result]; + } + $count++; + } + break; + } + } + } + return $listArray; +} +?> + </body> +</html>