diff coordinates/local_monographs_coordinates.php @ 0:57bde4830927

first commit
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Tue, 24 Mar 2015 11:37:17 +0100
parents
children 2b5a5cb5d6d1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/coordinates/local_monographs_coordinates.php	Tue Mar 24 11:37:17 2015 +0100
@@ -0,0 +1,229 @@
+<?php
+include_once("../../interface/config.php");
+include_once('../../interface/Lib_mb_utf8.php');
+set_time_limit(0);
+ini_set('memory_limit', '-1');
+ini_set("auto_detect_line_endings", true);
+
+$link_mysql = mysql_connect($mysql_server, $mysql_user, $mysql_password);
+mysql_query("SET NAMES utf8");
+
+if (!$link_mysql) {
+	die('Could not connect: ' . mysql_error());
+}
+
+$db_selected = mysql_select_db($mysql_database, $link_mysql);
+if (!$db_selected) {
+	die ('Can\'t use foo : ' . mysql_error());
+}
+
+function getCoordinateFromFile(){
+	$fp=fopen("local_monographs_coordinates_filtered.txt","r");
+	$columnNameMapping=['level1'=>0,'level2'=>1,'start_year'=>2,'end_year'=>2,
+		'period'=>3,'books_id'=>4,'name'=>5,'place_name'=>6,
+		'admin_type'=>7,'x'=>8,'y'=>8,
+		'chgis_id'=>9,'1820_id'=>10,'1911_id'=>11,'cbdb_id'=>12];
+	$data=fgetcsv($fp,10000,"\t");
+	$coordinateArray=array();
+	$count=0;
+	while(!feof($fp)){
+		$data=fgetcsv($fp,10000,"\t");
+		$pattern='/([0-9]+)-([0-9]+)/';
+		if(!preg_match($pattern,$data[2],$match)){
+			if($data[4]!="")
+				echo $data[4]." contains wrong year format<br>";
+			continue;
+		}
+		foreach($columnNameMapping as $newName=>$oldName){
+			$coordinateArray[$count][$newName]=$data[$oldName];
+		}
+		$coordinateArray[$count]['start_year']=$match[1];
+		$coordinateArray[$count]['end_year']=$match[2];
+		$coordinateArray[$count]['books_id']=sprintf("%05d",$coordinateArray[$count]['books_id']);	
+		$coorArray=array();
+		$idNameArray=['chgis_id','1820_id','1911_id','cbdb_id'];
+		$coorArray=explode(';',$coordinateArray[$count]['x']); //Check if there are multiple coordinates associated with this place/book
+		$size=sizeof($coorArray);
+		foreach($idNameArray as $name){
+			if($coordinateArray[$count][$name]==""){
+				for($i=0;$i<$size;$i++){
+					$coordinateArray[$count][$name][$i]='';
+				}
+				continue;
+			}
+			$coordinateArray[$count][$name]=explode(';',$coordinateArray[$count][$name]);
+			if($size!=sizeof($coordinateArray[$count][$name])){
+				echo $coordinateArray[$count]['books_id']." column ".$name." contains wrong number of elements<br>"; // Check for data inconsistency
+			}
+		}
+		$coordinateArray[$count]['x']=array();	
+		$coordinateArray[$count]['y']=array();	
+		foreach($coorArray as $idx=>$coor){
+			$coordinateArray[$count]['x'][$idx]='';
+			$coordinateArray[$count]['y'][$idx]='';
+			if($coor!=''){
+				list($coordinateArray[$count]['x'][$idx],$coordinateArray[$count]['y'][$idx])=explode(',',$coor);
+			}
+			$coordinateArray[$count]['x'][$idx]=trim($coordinateArray[$count]['x'][$idx]);
+			$coordinateArray[$count]['y'][$idx]=trim($coordinateArray[$count]['y'][$idx]);
+		}
+
+		$count++;
+	}
+	return $coordinateArray;
+}
+
+function printCoordinate($coordinateArray){
+	echo "<table>";
+	foreach($coordinateArray as $coordinate){
+		echo "<tr>";
+		$array=['x','y','chgis_id','1820_id','1911_id','cbdb_id'];
+		foreach($coordinate as $name=>$column){
+			if(in_array($name,$array)){
+				echo "<td>";
+				foreach($column as $subColumn){
+					echo $subColumn."<br>"; 
+				}
+				continue;
+			}
+			echo "<td>".$column;
+		}
+	}
+	echo "</table>";
+}
+function bookIsInDatabase($info){
+	$compareColumnName=['level1','level2','start_year','end_year','period','name'];
+	$query="SELECT * FROM books WHERE id='".$info['books_id']."'";
+	$result=mysql_query($query);
+	if(mysql_num_rows($result)!=0){
+		$row=mysql_fetch_assoc($result);
+		if($info['level2']=="") $info['level2']="NULL";
+		foreach($compareColumnName as $column){
+			if($row[$column]!=$info[$column] && $column!='level2'){
+				echo $info['books_id']." column ".$column." is different from that in the database<br>";
+				//return false;
+			}
+		}
+		return true;
+	}
+	return false;
+}
+function insertCoordinateIntoDatabase($coordinateArray){
+	$row=0;
+	$columnName=['books_id','place_name','admin_type','x','y','chgis_id','1820_id','1911_id','cbdb_id'];
+	foreach($coordinateArray as $coordinate){
+		if(bookIsInDatabase($coordinate)){ // Check if the book_id already in the 'books' table
+			foreach($coordinate['x'] as $idx=>$c){
+				$query="INSERT INTO coordinates_books (";
+				$count=0;
+				foreach($columnName as $column){
+					$query.=$column;
+					if($count!=sizeof($columnName)-1){
+						$query.=",";
+					}
+					$count++;
+				}
+				$query.=") VALUES (";
+				$count=0;
+				foreach($columnName as $i=>$column){
+					if(!is_array($coordinate[$column])){
+						$query.="'".$coordinate[$column]."'";
+					}else{
+						$query.="'".$coordinate[$column][$idx]."'";
+					}
+					if($count!=sizeof($columnName)-1){
+						$query.=",";
+					}
+					$count++;
+				}
+				$query.=")";
+				//echo $query."<br>";
+				$result=mysql_query($query);
+				if(!$result){
+					echo "Failed during inserting: <br>";
+				   	echo $query."<br>";
+				}else{
+				   	//echo $coordinate['name']."++++++++++++".$coordinate['books_id']."<br>";
+				   	$row++;
+				}
+			}
+			//echo "<br>";
+		}
+	}	
+	echo "# of rows: ".$row."<br>";
+}
+function getBookListFromDatabase(){
+	$query="SELECT coor.place_name AS Address, coor.admin_type AS ADMIN_TYPE, books.level1 AS LEVEL1, books.level2 AS LEVEL2, 
+			books.name AS Name, books.period AS PERIOD, 
+			books.start_year AS 'TimeSpan:begin', books.end_year AS 'TimeSpan:end',
+			books.id AS BOOK_ID, coor.x AS Longitude, coor.y AS Latitude, 
+			coor.chgis_id AS CHGIS_ID, coor.1820_id AS 1820_ID,
+			coor.1911_id AS 1911_ID, coor.cbdb_id AS CBDB_ID,	
+		info.volume AS VOLUME, info.author AS AUTHOR, info.edition AS EDITION FROM books 
+		JOIN coordinates_books coor ON books.id=coor.books_id 
+		JOIN books_info info ON books.id=info.books_id";
+	$bookArray=array();
+        $result=mysql_query($query);
+        while($row=mysql_fetch_assoc($result)){
+                $bookArray[]=$row;
+        }
+        return $bookArray;
+}
+
+?>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<style>
+td{
+	min-width:80px;
+	max-width:160px;
+	border-right:1px #aaa solid;
+}
+</style>
+</head>
+<body>
+<?php
+$coordinateArray=getCoordinateFromFile(); //Read from local_monographs_coordinates_filtered.txt to get the most updated coordinates
+//printCoordinate($coordinateArray);
+insertCoordinateIntoDatabase($coordinateArray); //insert them to db
+$bookArray=getBookListFromDatabase();
+foreach($bookArray as $idx=>$book){
+        $bookArray[$idx]['Name']="(".$book['PERIOD'].") ".$book['Name'];
+        $book['AUTHOR']=str_replace("(","(",$book['AUTHOR']);
+        $book['AUTHOR']=str_replace(")",") ",$book['AUTHOR']);
+        $bookArray[$idx]['Description']=$book['VOLUME']." ╱ ".$book['AUTHOR']." ╱ ".$book['EDITION'];
+}
+$columnNameArray=['Address','ADMIN_TYPE','LEVEL1','LEVEL2',
+	'Name','PERIOD','TimeSpan:begin','TimeSpan:end','Longitude','Latitude',
+	'BOOK_ID','CHGIS_ID','1820_ID','1911_ID','CBDB_ID','Description'];
+
+echo sizeof($bookArray)."<br>";
+echo "<table>";
+echo "<tr>";
+$fp=fopen("books_coordinates.csv","w");
+foreach($columnNameArray as $column){
+        echo "<td>".$column;
+}
+fputcsv($fp,$columnNameArray);
+$count=0;
+foreach($bookArray as $book){
+        echo "<tr>";
+        $row=array();
+        foreach($columnNameArray as $column){
+                echo "<td>".$book[$column];
+                $row[]=$book[$column];
+        }
+        fputcsv($fp,$row);
+        $count++;
+}
+echo "</table>";
+
+
+?>
+
+</body>
+</html>
+