view coordinates/local_monographs_coordinates.php @ 12:a599ced81534

update books_coordinates.csv for 176 missing coordinates
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Mon, 19 Oct 2015 15:08:20 +0200
parents 2b5a5cb5d6d1
children
line wrap: on
line source

<?php
include_once("../config/config.php");
include_once("../config/Lib_mb_utf8.php");
set_time_limit(0);
ini_set('memory_limit', '-1');
ini_set("auto_detect_line_endings", true);

$link_mysql = mysql_connect($mysql_server, $mysql_user, $mysql_password);
mysql_query("SET NAMES utf8");

if (!$link_mysql) {
	die('Could not connect: ' . mysql_error());
}

$db_selected = mysql_select_db($mysql_database, $link_mysql);
if (!$db_selected) {
	die ('Can\'t use foo : ' . mysql_error());
}

function getCoordinateFromFile(){
	$fp=fopen("local_monographs_coordinates_filtered.txt","r");
	$columnNameMapping=['level1'=>0,'level2'=>1,'start_year'=>2,'end_year'=>2,
		'period'=>3,'books_id'=>4,'name'=>5,'place_name'=>6,
		'admin_type'=>7,'x'=>8,'y'=>8,
		'chgis_id'=>9,'1820_id'=>10,'1911_id'=>11,'cbdb_id'=>12];
	$data=fgetcsv($fp,10000,"\t");
	$coordinateArray=array();
	$count=0;
	while(!feof($fp)){
		$data=fgetcsv($fp,10000,"\t");
		$pattern='/([0-9]+)-([0-9]+)/';
		if(!preg_match($pattern,$data[2],$match)){
			if($data[4]!="")
				echo $data[4]." contains wrong year format<br>";
			continue;
		}
		foreach($columnNameMapping as $newName=>$oldName){
			$coordinateArray[$count][$newName]=$data[$oldName];
		}
		$coordinateArray[$count]['start_year']=$match[1];
		$coordinateArray[$count]['end_year']=$match[2];
		$coordinateArray[$count]['books_id']=sprintf("%05d",$coordinateArray[$count]['books_id']);	
		$coorArray=array();
		$idNameArray=['chgis_id','1820_id','1911_id','cbdb_id'];
		$coorArray=explode(';',$coordinateArray[$count]['x']); //Check if there are multiple coordinates associated with this place/book
		$size=sizeof($coorArray);
		foreach($idNameArray as $name){
			if($coordinateArray[$count][$name]==""){
				for($i=0;$i<$size;$i++){
					$coordinateArray[$count][$name][$i]='';
				}
				continue;
			}
			$coordinateArray[$count][$name]=explode(';',$coordinateArray[$count][$name]);
			if($size!=sizeof($coordinateArray[$count][$name])){
				echo $coordinateArray[$count]['books_id']." column ".$name." contains wrong number of elements<br>"; // Check for data inconsistency
			}
		}
		$coordinateArray[$count]['x']=array();	
		$coordinateArray[$count]['y']=array();	
		foreach($coorArray as $idx=>$coor){
			$coordinateArray[$count]['x'][$idx]='';
			$coordinateArray[$count]['y'][$idx]='';
			if($coor!=''){
				list($coordinateArray[$count]['x'][$idx],$coordinateArray[$count]['y'][$idx])=explode(',',$coor);
			}
			$coordinateArray[$count]['x'][$idx]=trim($coordinateArray[$count]['x'][$idx]);
			$coordinateArray[$count]['y'][$idx]=trim($coordinateArray[$count]['y'][$idx]);
		}

		$count++;
	}
	return $coordinateArray;
}

function printCoordinate($coordinateArray){
	echo "<table>";
	foreach($coordinateArray as $coordinate){
		echo "<tr>";
		$array=['x','y','chgis_id','1820_id','1911_id','cbdb_id'];
		foreach($coordinate as $name=>$column){
			if(in_array($name,$array)){
				echo "<td>";
				foreach($column as $subColumn){
					echo $subColumn."<br>"; 
				}
				continue;
			}
			echo "<td>".$column;
		}
	}
	echo "</table>";
}
function bookIsInDatabase($info){
	$compareColumnName=['level1','level2','start_year','end_year','period','name'];
	$query="SELECT * FROM books WHERE id='".$info['books_id']."'";
	$result=mysql_query($query);
	if(mysql_num_rows($result)!=0){
		$row=mysql_fetch_assoc($result);
		if($info['level2']=="") $info['level2']="NULL";
		foreach($compareColumnName as $column){
			if($row[$column]!=$info[$column] && $column!='level2'){
				echo $info['books_id']." column ".$column." is different from that in the database<br>";
				//return false;
			}
		}
		return true;
	}
	return false;
}
function insertCoordinateIntoDatabase($coordinateArray){
	$row=0;
	$columnName=['books_id','place_name','admin_type','x','y','chgis_id','1820_id','1911_id','cbdb_id'];
	foreach($coordinateArray as $coordinate){
		if(bookIsInDatabase($coordinate)){ // Check if the book_id already in the 'books' table
			foreach($coordinate['x'] as $idx=>$c){
				$query="INSERT INTO coordinates_books (";
				$count=0;
				foreach($columnName as $column){
					$query.=$column;
					if($count!=sizeof($columnName)-1){
						$query.=",";
					}
					$count++;
				}
				$query.=") VALUES (";
				$count=0;
				foreach($columnName as $i=>$column){
					if(!is_array($coordinate[$column])){
						$query.="'".$coordinate[$column]."'";
					}else{
						$query.="'".$coordinate[$column][$idx]."'";
					}
					if($count!=sizeof($columnName)-1){
						$query.=",";
					}
					$count++;
				}
				$query.=")";
				//echo $query."<br>";
				$result=mysql_query($query);
				if(!$result){
					echo "Failed during inserting: <br>";
				   	echo $query."<br>";
				}else{
				   	//echo $coordinate['name']."++++++++++++".$coordinate['books_id']."<br>";
				   	$row++;
				}
			}
			//echo "<br>";
		}
	}	
	echo "# of rows: ".$row."<br>";
}
function getBookListFromDatabase(){
	$query="SELECT coor.place_name AS Address, books.admin_type AS ADMIN_TYPE, books.level1 AS LEVEL1, books.level2 AS LEVEL2, 
			books.name AS Name, books.period AS PERIOD, 
			books.start_year AS 'TimeSpan:begin', books.end_year AS 'TimeSpan:end',
			books.id AS BOOK_ID, coor.x AS Longitude, coor.y AS Latitude, 
			coor.chgis_id AS CHGIS_ID, coor.1820_id AS 1820_ID,
			coor.1911_id AS 1911_ID, coor.cbdb_id AS CBDB_ID,	
		books.volume AS VOLUME, books.author AS AUTHOR, books.edition AS EDITION FROM books 
		JOIN coordinates_books coor ON books.id=coor.books_id"; 
		//JOIN books_info info ON books.id=info.books_id";
	$bookArray=array();
        $result=mysql_query($query);
        while($row=mysql_fetch_assoc($result)){
                $bookArray[]=$row;
        }

        return $bookArray;
}

?>

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<style>
td{
	min-width:80px;
	max-width:160px;
	border-right:1px #aaa solid;
}
</style>
</head>
<body>
<?php
//$coordinateArray=getCoordinateFromFile(); //Read from local_monographs_coordinates_filtered.txt to get the most updated coordinates
////printCoordinate($coordinateArray);
//insertCoordinateIntoDatabase($coordinateArray); //insert them to db

$bookArray=getBookListFromDatabase();
foreach($bookArray as $idx=>$book){
        $bookArray[$idx]['Name']="(".$book['PERIOD'].") ".$book['Name'];
        $book['AUTHOR']=str_replace("(","(",$book['AUTHOR']);
        $book['AUTHOR']=str_replace(")",") ",$book['AUTHOR']);
        $bookArray[$idx]['Description']=$book['VOLUME']." ╱ ".$book['AUTHOR']." ╱ ".$book['EDITION'];
}
$columnNameArray=['Address','ADMIN_TYPE','LEVEL1','LEVEL2',
	'Name','PERIOD','TimeSpan:begin','TimeSpan:end','Longitude','Latitude',
	'BOOK_ID','CHGIS_ID','1820_ID','1911_ID','CBDB_ID','Description'];

echo sizeof($bookArray)."<br>";
echo "<table>";
echo "<tr>";
$fp=fopen("books_coordinates.csv","w");
foreach($columnNameArray as $column){
        echo "<td>".$column;
}
fputcsv($fp,$columnNameArray);
$count=0;
foreach($bookArray as $book){
        echo "<tr>";
        $row=array();
        foreach($columnNameArray as $column){
                echo "<td>".$book[$column];
                $row[]=$book[$column];
        }
        fputcsv($fp,$row);
        $count++;
}
echo "</table>";


?>

</body>
</html>