Mercurial > hg > extraction-interface
view map/coordinates/get_coordinates_from_chgis.php @ 27:4a29bccb6c59
modify the SmartRegexSave method to prevent duplicated records in topic_regex_relation table and provide better promting to user to force saving regex file or not
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 03 Mar 2015 11:47:41 +0100 |
parents | b12c99b7c3f0 |
children |
line wrap: on
line source
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> <style type="text/css"> body{width:2000px;font-size:14px;} table{border-collapse:collapse;} .row{ } .lastRow{ /*border-bottom:1px solid #aaa;*/ } .separator{ height:1px; background:#aaa; } .coordinateUnfound{ background:#F6CECE; } .coordinateOverlapped{ background:#CEECF5; } .column{ min-height:20px; vertical-align:top; border-right:1px solid #aaa; padding:0 5px; } .level1{width:30px;} .level2{width:60px;} .level1Code{width:46px;} .level2Code{width:4px;} .years{width:68px;} .date{ width:30px;} .title{ width:100px;} .placeName,.name{width:80px;} .bookId{width:80px;} .xyCount{width:20px;} .adminType{ width:20px;} .mapUsed{width:30px;} .chgisId{width:4px;} .beginYear,.endYear{width:30px;} .sysId, .parentSysId{width:70px;} .transcription{width:90px;} .parent{width:90px;} .featureType{width:64px;} .x,.y{width:80px;} .dataSource{width:50px;} </style> <!--<script src="js/check_sections.js" charset="utf-8"></script>!--> </head> <body> <?php set_time_limit(0); ini_set('memory_limit', '-1'); $fileAppend=""; if(isset($_GET['list']) && $_GET['list']==176){ $fileAppend="_176"; } $allCoordinateFromFileArray=loadCoordinateFromFile(); $bookListArray=getBookList(); $columnName=['level1','level1Code','level2','level2Code','years','date','bookId','title','placeName','xyCount','adminType', //'beginYear','endYear','sysId','name','transcription','parentSysId','parent','featureType','x','y','dateSource']; 'name','beginYear','endYear','x','y','sysId','presLoc','adminType','parent','parentSysId','dataSource','description']; $csvColumnName=['Address','Name','TimeSpan:begin', 'TimeSpan:end', 'Longitude', 'Latitude', 'DATA_SOURCE', 'SYS_ID', 'PRES_LOC', 'ADMIN_TYPE', 'PARENT', 'PARENT_SYS_ID', 'Description']; echo "<table>"; echo "<tr class='row'>"; foreach($columnName as $name){ echo "<td class='".$name."'>".$name."</td>"; } echo "</tr>"; $bookListFp=fopen('./csv_files/list'.$fileAppend.'.csv','w'); $errorListFp=fopen('./csv_files/error'.$fileAppend.'.csv','w'); foreach($bookListArray as $book){ $pattern='/([0-9]+)-([0-9]+)/'; if(!preg_match($pattern,$book['years'],$match)){ fputcsv($errorListFp,$book); } $tok=strtok($book['placeName'],','); $placeNameArray=array(); while($tok!==false){ $placeNameArray[]=$tok; $tok=strtok(","); } if(sizeof($placeNameArray)==0){ $placeNameArray[0]=$book['placeName']; continue; } $book['bookId']=sprintf("%05d",$book['bookId']); $bookId=$book['bookId']; $fp=fopen('./csv_files/'.$bookId.'.csv','w'); fputcsv($fp,$csvColumnName); $allCoordinateArray=array(); foreach($placeNameArray as $placeName){ $beginYear=$match[1]; $endYear=$match[2]; //$placeName=str_replace($book['adminType'],"",$book['placeName']); //$placeName=$placeName.$book['adminType']; //first, search with the admin type as the place name $coordinateFromChgisArray=getCoordinateFromChgis($placeName,$beginYear,$endYear); //if no result is returned, remove the admin type and search agamin if(sizeof($coordinateFromChgisArray)==0){ $placeNameWithoutAdminType=str_replace($book['adminType'],"",$placeName); $coordinateFromChgisArray=getCoordinateFromChgis($placeNameWithoutAdminType,$beginYear,$endYear); } //get coordinate from the file 1820 & 1911 $coordinateFromFileArray=getCoordinateFromFile($placeName); //columns of different sources //book: level1, level1Code, level2, level2Code, years, date, bookId, title, placeName, xyCount, adminType //chgis: beginYear, endYear, sysId, name, transcription, parentSysId, percent featureType, x, y(with #), dataSource //file: sysId, name, x,y,presLod, type, beginYear, endYear, dataSource //parse the result into one array $coordinateArray=array(); $count=0; $chgisColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear', 'Longitude'=>'x', 'Latitude'=>'y', 'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'x'/**/, 'ADMIN_TYPE'=>'featureType', 'PARENT'=>'parent', 'PARENT_SYS_ID'=>'parentSysId'/*, 'Description'*/]; foreach($coordinateFromChgisArray as $coor){ foreach($chgisColumnMapping as $newName=>$oldName){ $coordinateArray[$count][$newName]=$coor[$oldName]; } $coordinateArray[$count]['PRES_LOC']=''; //$coordinateArray[$count]['Description']=''; $name=$coor['name']; $x=$coor['x']; $y=$coor['y']; $time=$coor['beginYear'].'-'.$coor['endYear']; $presLoc=''; $parent=$coor['parent']; $adminType=$coor['featureType']; $dataSource=$coor['dataSource']; $coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '. $presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource; $count++; } $fileColumnMapping=['Address'=>'name','Name'=>'name','TimeSpan:begin'=>'beginYear', 'TimeSpan:end'=>'endYear', 'Longitude'=>'x', 'Latitude'=>'y', 'DATA_SOURCE'=>'dataSource','SYS_ID'=>'sysId', 'PRES_LOC'=>'presLoc', 'ADMIN_TYPE'=>'type' , 'PARENT'=>'name'/**/, 'PARENT_SYS_ID'=>'name'/**//*, 'Description'*/]; foreach($coordinateFromFileArray as $coor){ foreach($fileColumnMapping as $newName=>$oldName){ $coordinateArray[$count][$newName]=$coor[$oldName]; } $coordinateArray[$count]['PARENT']=''; $coordinateArray[$count]['PARENT_SYS_ID']=''; $name=$coor['name']; $x=$coor['x']; $y=$coor['y']; $time=$coor['beginYear'].'-'.$coor['endYear']; $presLoc=$coor['presLoc']; $parent=''; $adminType=$coor['type']; $dataSource=$coor['dataSource']; $coordinateArray[$count]['Description']=$name.' / '.$x.', '.$y.' / '.$time.' / '. $presLoc.' / '.$parent.' / '.$adminType.' / '.$dataSource; $count++; } foreach($coordinateArray as $coor){ fputcsv($fp,$coor); $allCoordinateArray[]=$coor; } }//end of foreach //write to the list file $array=[$book['level1'],$book['date'],$book['title'],$book['placeName'],$book['bookId'],sizeof($allCoordinateArray)]; fputcsv($bookListFp, $array); $class=""; $style=""; if(sizeof($allCoordinateArray)>1){ $class="coordinateOverlapped"; $class=""; $style="style='background:#ceecf5'"; } foreach($allCoordinateArray as $i=>$coordinate){ $array=[$coordinate['Longitude'],$coordinate['Latitude']]; fputcsv($bookListFp,$array); $coordinate['Longitude']="#".$coordinate['Longitude']; $coordinate['Latitude']="#".$coordinate['Latitude']; if($i==sizeof($allCoordinateArray)-1){ $class.=' lastRow'; } echo "<tr class='".$class."' ".$style.">"; foreach($book as $idx=>$property){ echo "<td class='column ".$idx."'>".$property."</td>"; } foreach($coordinate as $idx=>$property){ if($idx=='Address'){ continue; } echo "<td class='column ".$idx."'>".$property."</td>"; } echo "</tr>"; } if(sizeof($allCoordinateArray)==0){ echo "<tr class='lastRow' style='background:#F6CECE'>"; foreach($book as $idx=>$property){ echo "<td class='column ".$idx."'>".$property."</td>"; } echo "</tr>"; } echo "<tr class='separator'><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td><td></tr>"; fclose($fp); } fclose($bookListFp); echo "</table>"; function getBookList(){ global $fileAppend; $fp=fopen("local_monographs_list".$fileAppend.".txt","r"); $data=fgetcsv($fp,100000,"\t"); $listArray=array(); $bookNumber=0; while(!feof($fp)){ $data=fgetcsv($fp,1000000,"\t"); $listArray[$bookNumber]['level1']=$data[0]; $listArray[$bookNumber]['level1Code']=$data[1]; $listArray[$bookNumber]['level2']=$data[2]; $listArray[$bookNumber]['level2Code']=$data[3]; $listArray[$bookNumber]['years']=$data[4]; $listArray[$bookNumber]['date']=$data[5]; $listArray[$bookNumber]['bookId']=$data[6]; $listArray[$bookNumber]['title']=$data[8]; $listArray[$bookNumber]['placeName']=$data[9]; $listArray[$bookNumber]['xyCount']=$data[10]; $listArray[$bookNumber]['adminType']=$data[11]; $bookNumber++; } return $listArray; } function getCoordinateFromChgis($placeName,$beginYear,$endYear){ $url="http://chgis.hmdc.harvard.edu/placename?n=".$placeName; $array=array(); $count=0; $xml=simplexml_load_file($url); foreach($xml->placenames->placename as $property){ $begin=intval($property->years->begin); $end=intval($property->years->end); $x=$property->{'xy-coordinates'}->x; $y=$property->{'xy-coordinates'}->y; //$x=str_replace(".","",$x); //$y=str_replace(".","",$y); if((($beginYear>=$begin && $beginYear<=$end) || ($endYear>=$begin && $endYear<=$end)) && (intval($x)!=0 && intval($y)!=0)){ $array[$count]['beginYear']=$begin; $array[$count]['endYear']=$end; $array[$count]['sysId']=$property->attributes()->sys_id; $array[$count]['name']=$property->name; $array[$count]['transcription']=$property->transcription; $array[$count]['parentSysId']=$property->parent->attributes()->sys_id; $array[$count]['parent']=$property->parent; $array[$count]['featureType']=$property->{'feature-type'}; $array[$count]['x']=$x; $array[$count]['y']=$y; $array[$count]['dataSource']=$property->{'data-source'}; $count++; } } return $array; } function loadCoordinateFromFile(){ $listArray=array(); $yearArray=[1820,1911]; $adminArray=['cnty','pref','prov']; $columnArray=array(); $columnArray[1820]=[1=>'sysId',4=>'name',5=>'x',6=>'y', 7=>'presLoc',9=>'type',11=>'beginYear',13=>'endYear']; $columnArray[1911]=[1=>'sysId',5=>'name',6=>'x',7=>'y', 8=>'presLoc',10=>'type',12=>'beginYear',14=>'endYear']; foreach($yearArray as $year){ foreach($adminArray as $admin){ $fp=fopen("./1820_1911/".$year."_".$admin."_pts.txt","r"); $data=fgetcsv($fp,100000,"\t"); $count=0; while(!feof($fp)){ $data=fgetcsv($fp,1000000,"\t"); foreach($columnArray[$year] as $idx=>$column){ $listArray[$year][$admin][$column][$count]=$data[$idx]; } $listArray[$year][$admin]['dataSource'][$count]=$year; $count++; } } } //var_dump($listArray); return $listArray; } //$array=getCoordinateFromFile("江西"); //print_r($array); function getCoordinateFromFile($place){ global $allCoordinateFromFileArray; $coorArray=$allCoordinateFromFileArray; $yearArray=[1820,1911]; $adminArray=['prov','pref','cnty']; $listArray=array(); $count=0; foreach($yearArray as $year){ foreach($adminArray as $admin){ if(in_array($place,$coorArray[$year][$admin]['name'])){ $resultArray=array_keys($coorArray[$year][$admin]['name'],$place); foreach($resultArray as $result){ foreach($coorArray[$year][$admin] as $idx=>$val){ $listArray[$count][$idx]=$coorArray[$year][$admin][$idx][$result]; } $count++; } break; } } } return $listArray; } ?> </body> </html>