Mercurial > hg > LGSearch
comparison search_function.php @ 0:c9363a90b8b5
first commit to development server
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Tue, 24 Mar 2015 15:12:34 +0100 |
| parents | |
| children | 38851c894301 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c9363a90b8b5 |
|---|---|
| 1 <?php | |
| 2 | |
| 3 include_once('config/Lib_mb_utf8.php'); | |
| 4 include_once('config/config.php'); | |
| 5 | |
| 6 set_time_limit(0); | |
| 7 ini_set('memory_limit', '-1'); | |
| 8 | |
| 9 $link_mysql = mysql_connect($mysql_server, $mysql_user, $mysql_password); | |
| 10 mysql_query("SET NAMES utf8"); | |
| 11 | |
| 12 if (!$link_mysql) { | |
| 13 die('Could not connect: ' . mysql_error()); | |
| 14 } | |
| 15 | |
| 16 $db_selected = mysql_select_db($mysql_database, $link_mysql); | |
| 17 if (!$db_selected) { | |
| 18 die ('Can\'t use foo : ' . mysql_error()); | |
| 19 } | |
| 20 function trimKeyword($keywords){ | |
| 21 $token=strtok($keywords,","); | |
| 22 while($token!=false){ | |
| 23 $token=trim($token); | |
| 24 $keywordArray[]=$token; | |
| 25 $token=strtok(","); | |
| 26 } | |
| 27 return $keywordArray; | |
| 28 } | |
| 29 | |
| 30 function search($keywordArray){ | |
| 31 $condition=""; | |
| 32 foreach($keywordArray as $i=>$keyword){ | |
| 33 if($i!=0){ | |
| 34 $condition.=" OR "; | |
| 35 } | |
| 36 $condition.=" contents.content LIKE '%".$keyword."%' "; | |
| 37 } | |
| 38 //books.name AS book_name, books.level1, books.level2, books.period, contents.books_id, contents.line, contents.content | |
| 39 | |
| 40 // TODO: book_info is now merged to books table | |
| 41 $query="SELECT books.level1 AS LEVEL1, books.level2 AS LEVEL2, | |
| 42 books.name AS Name, books.period AS PERIOD, | |
| 43 books.start_year AS 'TimeSpan:begin', books.end_year AS 'TimeSpan:end', | |
| 44 books.id AS BOOK_ID, contents.line AS PAGE, contents.content AS CONTENT, | |
| 45 books.volume AS VOLUME, books.author AS AUTHOR, books.edition AS EDITION | |
| 46 FROM contents | |
| 47 JOIN books ON contents.books_id = books.id | |
| 48 WHERE ".$condition." | |
| 49 ORDER BY contents.books_id, contents.line"; | |
| 50 $result = mysql_query($query); | |
| 51 if (!$result) { | |
| 52 echo mysql_error(); | |
| 53 } | |
| 54 echo "result length: ".mysql_num_rows($result)."<br>"; | |
| 55 | |
| 56 while ($row = mysql_fetch_assoc($result)) { //Find the section(s) where the page belongs to | |
| 57 //$resultArray[$i]=$row; | |
| 58 $query = "SELECT MAX(`id`) FROM `sections_versions` WHERE `books_id` ='".$row['BOOK_ID']."'"; | |
| 59 | |
| 60 $r = mysql_fetch_row(mysql_query($query)); | |
| 61 $max_version_id = $r[0]; | |
| 62 | |
| 63 $subQuery = ""; | |
| 64 if ($max_version_id) { | |
| 65 $subQuery="SELECT id, name, start_page, end_page | |
| 66 FROM sections_revisions | |
| 67 WHERE books_id=".$row['BOOK_ID']." AND start_page<=".$row['PAGE']." AND end_page>=".$row['PAGE']." AND sections_revisions.deleted=0 AND sections_revisions.versions_id=".$max_version_id; | |
| 68 | |
| 69 } else { | |
| 70 $subQuery="SELECT id, name, start_page, end_page | |
| 71 FROM sections | |
| 72 WHERE books_id=".$row['BOOK_ID']." AND start_page<=".$row['PAGE']." AND end_page>=".$row['PAGE']; | |
| 73 } | |
| 74 | |
| 75 $subResult=mysql_query($subQuery); | |
| 76 $sectionArray=array(); | |
| 77 while($subRow=mysql_fetch_assoc($subResult)){ | |
| 78 $sectionArray[]=$subRow; | |
| 79 } | |
| 80 $row['SECTION']=$sectionArray; | |
| 81 | |
| 82 $resultArray[]=$row; | |
| 83 } | |
| 84 //echo "array length: ".sizeof($resultArray)."<br>"; | |
| 85 return $resultArray; | |
| 86 } | |
| 87 $NO_TAG=0; | |
| 88 $LOCUST_TEMPLE=1; | |
| 89 | |
| 90 function printTable($array,$keywordArray,$filename,$tag){ //print HTML | |
| 91 global $NO_TAG, $LOCUST_TEMPLE; | |
| 92 $header='<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> | |
| 93 <html> | |
| 94 <head> | |
| 95 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> | |
| 96 <link href="./search.css" type="text/css" rel="stylesheet"/> | |
| 97 <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js" type="text/javascript"></script> | |
| 98 <script src="./search.js" charset="utf-8"></script> | |
| 99 </head> | |
| 100 <body>'; | |
| 101 $keywordStr=""; | |
| 102 foreach($keywordArray as $i=>$keyword){ | |
| 103 if($i!=0){ | |
| 104 $keywordStr.=", "; | |
| 105 } | |
| 106 $keywordStr.=$keyword; | |
| 107 } | |
| 108 | |
| 109 | |
| 110 global $system_root_url, $server_host, $lgserver_url; | |
| 111 | |
| 112 $fp=fopen("./search_results/".$filename.".html","w"); | |
| 113 fwrite($fp,$header); | |
| 114 $str="<a href='".$server_host."LGMap/get_coordinates_for_listed_books.php?file=".$filename.".csv&name=".$keywordStr."' target='_blank'>view the distribution on the map</a><br><br>"; | |
| 115 fwrite($fp,$str); | |
| 116 $str.="<a href='".$system_root_url."search_results/".$filename.".html' target='_blank'>html version</a><br><br>"; | |
| 117 // $str.="<a href='http://".$_SERVER['HTTP_HOST']."/LGSearch/search_results/".$filename.".html' target='_blank'>html version</a><br><br>"; | |
| 118 echo $str; | |
| 119 echo sizeof($array)." result(s) of \"".$keywordStr."\" "; | |
| 120 fwrite($fp,sizeof($array)." result(s) of \"".$keywordStr."\"<br>"); | |
| 121 $str="<table>"; | |
| 122 $str.="<tr>"; | |
| 123 $str.="<td class='sequence'>#<td class='bookId'>book id<td class='bookName'>book name<td class='level1'>level1<td class='level2'>level2<td class='period'>period<td class='sectionName'>section info<td class='page'>page<td class='content'>content"; | |
| 124 fwrite($fp,$str); | |
| 125 echo $str; | |
| 126 $i=1; | |
| 127 foreach($array as $row){ | |
| 128 $str="<tr>"; | |
| 129 $str.="<td>".$i; | |
| 130 $str.="<td>".$row['BOOK_ID']; | |
| 131 $str.="<td>".$row['Name']; | |
| 132 $str.="<td>".$row['LEVEL1']; | |
| 133 $str.="<td>".$row['LEVEL2']; | |
| 134 $str.="<td>".$row['PERIOD']; | |
| 135 $str.="<td>"; | |
| 136 fwrite($fp,$str); | |
| 137 echo $str; | |
| 138 foreach($row['SECTION'] as $section){ | |
| 139 // TODO: config to current extraction-interface OR Toc OR not at all?? | |
| 140 // redirect to LGServer | |
| 141 $str="<div class='section'><a href='".$lgserver_url."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; | |
| 142 echo $str; | |
| 143 $str="<div class='section'><a href='".$lgserver_url."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; | |
| 144 fwrite($fp,$str); | |
| 145 /* | |
| 146 $str="<div class='section'><a href='/interface/tagging_text.php?id=".$section['id']."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; | |
| 147 echo $str; | |
| 148 $str="<div class='section'><a href='/interface/tagging_text.php?id=".$section['id']."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; | |
| 149 fwrite($fp,$str); | |
| 150 */ | |
| 151 } | |
| 152 $str="<td>".$row['PAGE']; | |
| 153 if($tag==$NO_TAG){ | |
| 154 $str.="<td>".$row['CONTENT']; | |
| 155 }else if($tag==$LOCUST_TEMPLE){ | |
| 156 $str.="<td>".findLocustTempleDescription($row['CONTENT'],$keywordArray,0); | |
| 157 } | |
| 158 fwrite($fp,$str); | |
| 159 echo $str; | |
| 160 $i++; | |
| 161 } | |
| 162 | |
| 163 echo "</table>"; | |
| 164 fwrite($fp,"</table></body></html>"); | |
| 165 fclose($fp); | |
| 166 } | |
| 167 | |
| 168 function findLocustTempleDescription($str,$keywordArray,$findUnknownTemple){ | |
| 169 $result=""; | |
| 170 $pattern[0]="/(.*)("; | |
| 171 foreach($keywordArray as $i=>$keyword){ | |
| 172 $pattern[0].=$keyword; | |
| 173 if($i!=sizeof($keywordArray)-1){ | |
| 174 $pattern[0].="|"; | |
| 175 } | |
| 176 } | |
| 177 $pattern[0].=")(.*)/u"; | |
| 178 //0=whole,1=text,2=locust temple,3=description | |
| 179 $pattern[1]="/(.*\s+)(\S{1,5}[廟寺祠])(.*)/u"; | |
| 180 //0=whole,1=descripption,2=unknown temple,3=text | |
| 181 $success=preg_match($pattern[$findUnknownTemple],$str,$match); //note that preg_match only matches the last match! | |
| 182 if($findUnknownTemple==0){ | |
| 183 if($success==1){//find string "locust temple" | |
| 184 $result=findLocustTempleDescription($match[1],$keywordArray,0); | |
| 185 $result.="<keyword>".$match[2]."</keyword>"; | |
| 186 $result.=findLocustTempleDescription($match[3],$keywordArray,1); | |
| 187 return $result; | |
| 188 }else{//no string "locust temple" | |
| 189 return "<irrelevant>".$str."</irrelevant>"; | |
| 190 } | |
| 191 }else{ | |
| 192 if($success==1){//find the pattern "locust temple......unknown temple" | |
| 193 $result=findLocustTempleDescription($match[1],$keywordArray,1); | |
| 194 $result.="<irrelevant>".$match[2].$match[3]."</irrelevant>"; | |
| 195 //$result.="<keyword>".$match[2]."</keyword>".$match[3]; | |
| 196 return $result; | |
| 197 }else{//can't find the pattern | |
| 198 return $str; | |
| 199 } | |
| 200 | |
| 201 } | |
| 202 } | |
| 203 | |
| 204 function writeCsvFile($array,$fileName){ | |
| 205 $columnNameArray=['BOOK_ID','LEVEL1','LEVEL2', | |
| 206 'Name','PERIOD','TimeSpan:begin','TimeSpan:end','PAGE','SECTION','CONTENT', | |
| 207 'Description']; | |
| 208 $fp=fopen("./csv_files/".$fileName.".csv","w"); | |
| 209 fputcsv($fp,$columnNameArray); | |
| 210 foreach($array as $row){ | |
| 211 $book=array(); | |
| 212 $row['Description']=''; | |
| 213 foreach($columnNameArray as $column){ | |
| 214 $book[$column]=$row[$column]; | |
| 215 } | |
| 216 $book['Name']="(".$row['PERIOD'].") ".$row['Name']; | |
| 217 $row['AUTHOR']=str_replace("(","(",$row['AUTHOR']); | |
| 218 $row['AUTHOR']=str_replace(")",") ",$row['AUTHOR']); | |
| 219 $book['Description']=$row['VOLUME']." ╱ ".$row['AUTHOR']." ╱ ".$row['EDITION']; | |
| 220 $book['SECTION']=''; | |
| 221 foreach($row['SECTION'] as $idx=>$section){ | |
| 222 $book['SECTION'].=$section['name']." ".$section['start_page']."-".$section['end_page']; | |
| 223 if($idx!=sizeof($row['SECTION'])){ | |
| 224 $book['SECTION'].=" ╱ "; | |
| 225 } | |
| 226 } | |
| 227 fputcsv($fp,$book); | |
| 228 } | |
| 229 fclose($fp); | |
| 230 } | |
| 231 | |
| 232 ?> | |
| 233 |
