comparison coordinates/local_monographs_coordinates.php @ 0:57bde4830927

first commit
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Tue, 24 Mar 2015 11:37:17 +0100
parents
children 2b5a5cb5d6d1
comparison
equal deleted inserted replaced
-1:000000000000 0:57bde4830927
1 <?php
2 include_once("../../interface/config.php");
3 include_once('../../interface/Lib_mb_utf8.php');
4 set_time_limit(0);
5 ini_set('memory_limit', '-1');
6 ini_set("auto_detect_line_endings", true);
7
8 $link_mysql = mysql_connect($mysql_server, $mysql_user, $mysql_password);
9 mysql_query("SET NAMES utf8");
10
11 if (!$link_mysql) {
12 die('Could not connect: ' . mysql_error());
13 }
14
15 $db_selected = mysql_select_db($mysql_database, $link_mysql);
16 if (!$db_selected) {
17 die ('Can\'t use foo : ' . mysql_error());
18 }
19
20 function getCoordinateFromFile(){
21 $fp=fopen("local_monographs_coordinates_filtered.txt","r");
22 $columnNameMapping=['level1'=>0,'level2'=>1,'start_year'=>2,'end_year'=>2,
23 'period'=>3,'books_id'=>4,'name'=>5,'place_name'=>6,
24 'admin_type'=>7,'x'=>8,'y'=>8,
25 'chgis_id'=>9,'1820_id'=>10,'1911_id'=>11,'cbdb_id'=>12];
26 $data=fgetcsv($fp,10000,"\t");
27 $coordinateArray=array();
28 $count=0;
29 while(!feof($fp)){
30 $data=fgetcsv($fp,10000,"\t");
31 $pattern='/([0-9]+)-([0-9]+)/';
32 if(!preg_match($pattern,$data[2],$match)){
33 if($data[4]!="")
34 echo $data[4]." contains wrong year format<br>";
35 continue;
36 }
37 foreach($columnNameMapping as $newName=>$oldName){
38 $coordinateArray[$count][$newName]=$data[$oldName];
39 }
40 $coordinateArray[$count]['start_year']=$match[1];
41 $coordinateArray[$count]['end_year']=$match[2];
42 $coordinateArray[$count]['books_id']=sprintf("%05d",$coordinateArray[$count]['books_id']);
43 $coorArray=array();
44 $idNameArray=['chgis_id','1820_id','1911_id','cbdb_id'];
45 $coorArray=explode(';',$coordinateArray[$count]['x']); //Check if there are multiple coordinates associated with this place/book
46 $size=sizeof($coorArray);
47 foreach($idNameArray as $name){
48 if($coordinateArray[$count][$name]==""){
49 for($i=0;$i<$size;$i++){
50 $coordinateArray[$count][$name][$i]='';
51 }
52 continue;
53 }
54 $coordinateArray[$count][$name]=explode(';',$coordinateArray[$count][$name]);
55 if($size!=sizeof($coordinateArray[$count][$name])){
56 echo $coordinateArray[$count]['books_id']." column ".$name." contains wrong number of elements<br>"; // Check for data inconsistency
57 }
58 }
59 $coordinateArray[$count]['x']=array();
60 $coordinateArray[$count]['y']=array();
61 foreach($coorArray as $idx=>$coor){
62 $coordinateArray[$count]['x'][$idx]='';
63 $coordinateArray[$count]['y'][$idx]='';
64 if($coor!=''){
65 list($coordinateArray[$count]['x'][$idx],$coordinateArray[$count]['y'][$idx])=explode(',',$coor);
66 }
67 $coordinateArray[$count]['x'][$idx]=trim($coordinateArray[$count]['x'][$idx]);
68 $coordinateArray[$count]['y'][$idx]=trim($coordinateArray[$count]['y'][$idx]);
69 }
70
71 $count++;
72 }
73 return $coordinateArray;
74 }
75
76 function printCoordinate($coordinateArray){
77 echo "<table>";
78 foreach($coordinateArray as $coordinate){
79 echo "<tr>";
80 $array=['x','y','chgis_id','1820_id','1911_id','cbdb_id'];
81 foreach($coordinate as $name=>$column){
82 if(in_array($name,$array)){
83 echo "<td>";
84 foreach($column as $subColumn){
85 echo $subColumn."<br>";
86 }
87 continue;
88 }
89 echo "<td>".$column;
90 }
91 }
92 echo "</table>";
93 }
94 function bookIsInDatabase($info){
95 $compareColumnName=['level1','level2','start_year','end_year','period','name'];
96 $query="SELECT * FROM books WHERE id='".$info['books_id']."'";
97 $result=mysql_query($query);
98 if(mysql_num_rows($result)!=0){
99 $row=mysql_fetch_assoc($result);
100 if($info['level2']=="") $info['level2']="NULL";
101 foreach($compareColumnName as $column){
102 if($row[$column]!=$info[$column] && $column!='level2'){
103 echo $info['books_id']." column ".$column." is different from that in the database<br>";
104 //return false;
105 }
106 }
107 return true;
108 }
109 return false;
110 }
111 function insertCoordinateIntoDatabase($coordinateArray){
112 $row=0;
113 $columnName=['books_id','place_name','admin_type','x','y','chgis_id','1820_id','1911_id','cbdb_id'];
114 foreach($coordinateArray as $coordinate){
115 if(bookIsInDatabase($coordinate)){ // Check if the book_id already in the 'books' table
116 foreach($coordinate['x'] as $idx=>$c){
117 $query="INSERT INTO coordinates_books (";
118 $count=0;
119 foreach($columnName as $column){
120 $query.=$column;
121 if($count!=sizeof($columnName)-1){
122 $query.=",";
123 }
124 $count++;
125 }
126 $query.=") VALUES (";
127 $count=0;
128 foreach($columnName as $i=>$column){
129 if(!is_array($coordinate[$column])){
130 $query.="'".$coordinate[$column]."'";
131 }else{
132 $query.="'".$coordinate[$column][$idx]."'";
133 }
134 if($count!=sizeof($columnName)-1){
135 $query.=",";
136 }
137 $count++;
138 }
139 $query.=")";
140 //echo $query."<br>";
141 $result=mysql_query($query);
142 if(!$result){
143 echo "Failed during inserting: <br>";
144 echo $query."<br>";
145 }else{
146 //echo $coordinate['name']."++++++++++++".$coordinate['books_id']."<br>";
147 $row++;
148 }
149 }
150 //echo "<br>";
151 }
152 }
153 echo "# of rows: ".$row."<br>";
154 }
155 function getBookListFromDatabase(){
156 $query="SELECT coor.place_name AS Address, coor.admin_type AS ADMIN_TYPE, books.level1 AS LEVEL1, books.level2 AS LEVEL2,
157 books.name AS Name, books.period AS PERIOD,
158 books.start_year AS 'TimeSpan:begin', books.end_year AS 'TimeSpan:end',
159 books.id AS BOOK_ID, coor.x AS Longitude, coor.y AS Latitude,
160 coor.chgis_id AS CHGIS_ID, coor.1820_id AS 1820_ID,
161 coor.1911_id AS 1911_ID, coor.cbdb_id AS CBDB_ID,
162 info.volume AS VOLUME, info.author AS AUTHOR, info.edition AS EDITION FROM books
163 JOIN coordinates_books coor ON books.id=coor.books_id
164 JOIN books_info info ON books.id=info.books_id";
165 $bookArray=array();
166 $result=mysql_query($query);
167 while($row=mysql_fetch_assoc($result)){
168 $bookArray[]=$row;
169 }
170 return $bookArray;
171 }
172
173 ?>
174
175 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
176 <html>
177 <head>
178 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
179 <style>
180 td{
181 min-width:80px;
182 max-width:160px;
183 border-right:1px #aaa solid;
184 }
185 </style>
186 </head>
187 <body>
188 <?php
189 $coordinateArray=getCoordinateFromFile(); //Read from local_monographs_coordinates_filtered.txt to get the most updated coordinates
190 //printCoordinate($coordinateArray);
191 insertCoordinateIntoDatabase($coordinateArray); //insert them to db
192 $bookArray=getBookListFromDatabase();
193 foreach($bookArray as $idx=>$book){
194 $bookArray[$idx]['Name']="(".$book['PERIOD'].") ".$book['Name'];
195 $book['AUTHOR']=str_replace("(","(",$book['AUTHOR']);
196 $book['AUTHOR']=str_replace(")",") ",$book['AUTHOR']);
197 $bookArray[$idx]['Description']=$book['VOLUME']." ╱ ".$book['AUTHOR']." ╱ ".$book['EDITION'];
198 }
199 $columnNameArray=['Address','ADMIN_TYPE','LEVEL1','LEVEL2',
200 'Name','PERIOD','TimeSpan:begin','TimeSpan:end','Longitude','Latitude',
201 'BOOK_ID','CHGIS_ID','1820_ID','1911_ID','CBDB_ID','Description'];
202
203 echo sizeof($bookArray)."<br>";
204 echo "<table>";
205 echo "<tr>";
206 $fp=fopen("books_coordinates.csv","w");
207 foreach($columnNameArray as $column){
208 echo "<td>".$column;
209 }
210 fputcsv($fp,$columnNameArray);
211 $count=0;
212 foreach($bookArray as $book){
213 echo "<tr>";
214 $row=array();
215 foreach($columnNameArray as $column){
216 echo "<td>".$book[$column];
217 $row[]=$book[$column];
218 }
219 fputcsv($fp,$row);
220 $count++;
221 }
222 echo "</table>";
223
224
225 ?>
226
227 </body>
228 </html>
229