Mercurial > hg > LGSearch
view search_function.php @ 1:38851c894301
path config
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 24 Mar 2015 16:27:18 +0100 |
parents | c9363a90b8b5 |
children | 45cbafdec56e |
line wrap: on
line source
<?php include_once('config/Lib_mb_utf8.php'); include_once('config/config.php'); set_time_limit(0); ini_set('memory_limit', '-1'); $link_mysql = mysql_connect($mysql_server, $mysql_user, $mysql_password); mysql_query("SET NAMES utf8"); if (!$link_mysql) { die('Could not connect: ' . mysql_error()); } $db_selected = mysql_select_db($mysql_database, $link_mysql); if (!$db_selected) { die ('Can\'t use foo : ' . mysql_error()); } function trimKeyword($keywords){ $token=strtok($keywords,","); while($token!=false){ $token=trim($token); $keywordArray[]=$token; $token=strtok(","); } return $keywordArray; } function search($keywordArray){ $condition=""; foreach($keywordArray as $i=>$keyword){ if($i!=0){ $condition.=" OR "; } $condition.=" contents.content LIKE '%".$keyword."%' "; } //books.name AS book_name, books.level1, books.level2, books.period, contents.books_id, contents.line, contents.content // TODO: book_info is now merged to books table $query="SELECT books.level1 AS LEVEL1, books.level2 AS LEVEL2, books.name AS Name, books.period AS PERIOD, books.start_year AS 'TimeSpan:begin', books.end_year AS 'TimeSpan:end', books.id AS BOOK_ID, contents.line AS PAGE, contents.content AS CONTENT, books.volume AS VOLUME, books.author AS AUTHOR, books.edition AS EDITION FROM contents JOIN books ON contents.books_id = books.id WHERE ".$condition." ORDER BY contents.books_id, contents.line"; $result = mysql_query($query); if (!$result) { echo mysql_error(); } echo "result length: ".mysql_num_rows($result)."<br>"; while ($row = mysql_fetch_assoc($result)) { //Find the section(s) where the page belongs to //$resultArray[$i]=$row; $query = "SELECT MAX(`id`) FROM `sections_versions` WHERE `books_id` ='".$row['BOOK_ID']."'"; $r = mysql_fetch_row(mysql_query($query)); $max_version_id = $r[0]; $subQuery = ""; if ($max_version_id) { $subQuery="SELECT id, name, start_page, end_page FROM sections_revisions WHERE books_id=".$row['BOOK_ID']." AND start_page<=".$row['PAGE']." AND end_page>=".$row['PAGE']." AND sections_revisions.deleted=0 AND sections_revisions.versions_id=".$max_version_id; } else { $subQuery="SELECT id, name, start_page, end_page FROM sections WHERE books_id=".$row['BOOK_ID']." AND start_page<=".$row['PAGE']." AND end_page>=".$row['PAGE']; } $subResult=mysql_query($subQuery); $sectionArray=array(); while($subRow=mysql_fetch_assoc($subResult)){ $sectionArray[]=$subRow; } $row['SECTION']=$sectionArray; $resultArray[]=$row; } //echo "array length: ".sizeof($resultArray)."<br>"; return $resultArray; } $NO_TAG=0; $LOCUST_TEMPLE=1; function printTable($array,$keywordArray,$filename,$tag){ //print HTML global $NO_TAG, $LOCUST_TEMPLE; $header='<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> <link href="../search.css" type="text/css" rel="stylesheet"/> <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js" type="text/javascript"></script> <script src="../search.js" charset="utf-8"></script> </head> <body>'; $keywordStr=""; foreach($keywordArray as $i=>$keyword){ if($i!=0){ $keywordStr.=", "; } $keywordStr.=$keyword; } global $system_root_url, $server_host, $lgserver_url; $fp=fopen("./search_results/".$filename.".html","w"); fwrite($fp,$header); $str="<a href='".$server_host."LGMap/get_coordinates_for_listed_books.php?file=".$filename.".csv&name=".$keywordStr."' target='_blank'>view the distribution on the map</a><br><br>"; fwrite($fp,$str); $str.="<a href='".$system_root_url."search_results/".$filename.".html' target='_blank'>html version</a><br><br>"; // $str.="<a href='http://".$_SERVER['HTTP_HOST']."/LGSearch/search_results/".$filename.".html' target='_blank'>html version</a><br><br>"; echo $str; echo sizeof($array)." result(s) of \"".$keywordStr."\" "; fwrite($fp,sizeof($array)." result(s) of \"".$keywordStr."\"<br>"); $str="<table>"; $str.="<tr>"; $str.="<td class='sequence'>#<td class='bookId'>book id<td class='bookName'>book name<td class='level1'>level1<td class='level2'>level2<td class='period'>period<td class='sectionName'>section info<td class='page'>page<td class='content'>content"; fwrite($fp,$str); echo $str; $i=1; foreach($array as $row){ $str="<tr>"; $str.="<td>".$i; $str.="<td>".$row['BOOK_ID']; $str.="<td>".$row['Name']; $str.="<td>".$row['LEVEL1']; $str.="<td>".$row['LEVEL2']; $str.="<td>".$row['PERIOD']; $str.="<td>"; fwrite($fp,$str); echo $str; foreach($row['SECTION'] as $section){ // TODO: config to current extraction-interface OR Toc OR not at all?? // redirect to LGServer $str="<div class='section'><a href='".$lgserver_url."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; echo $str; $str="<div class='section'><a href='".$lgserver_url."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; fwrite($fp,$str); /* $str="<div class='section'><a href='/interface/tagging_text.php?id=".$section['id']."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; echo $str; $str="<div class='section'><a href='/interface/tagging_text.php?id=".$section['id']."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; fwrite($fp,$str); */ } $str="<td>".$row['PAGE']; if($tag==$NO_TAG){ $str.="<td>".$row['CONTENT']; }else if($tag==$LOCUST_TEMPLE){ $str.="<td>".findLocustTempleDescription($row['CONTENT'],$keywordArray,0); } fwrite($fp,$str); echo $str; $i++; } echo "</table>"; fwrite($fp,"</table></body></html>"); fclose($fp); } function findLocustTempleDescription($str,$keywordArray,$findUnknownTemple){ $result=""; $pattern[0]="/(.*)("; foreach($keywordArray as $i=>$keyword){ $pattern[0].=$keyword; if($i!=sizeof($keywordArray)-1){ $pattern[0].="|"; } } $pattern[0].=")(.*)/u"; //0=whole,1=text,2=locust temple,3=description $pattern[1]="/(.*\s+)(\S{1,5}[廟寺祠])(.*)/u"; //0=whole,1=descripption,2=unknown temple,3=text $success=preg_match($pattern[$findUnknownTemple],$str,$match); //note that preg_match only matches the last match! if($findUnknownTemple==0){ if($success==1){//find string "locust temple" $result=findLocustTempleDescription($match[1],$keywordArray,0); $result.="<keyword>".$match[2]."</keyword>"; $result.=findLocustTempleDescription($match[3],$keywordArray,1); return $result; }else{//no string "locust temple" return "<irrelevant>".$str."</irrelevant>"; } }else{ if($success==1){//find the pattern "locust temple......unknown temple" $result=findLocustTempleDescription($match[1],$keywordArray,1); $result.="<irrelevant>".$match[2].$match[3]."</irrelevant>"; //$result.="<keyword>".$match[2]."</keyword>".$match[3]; return $result; }else{//can't find the pattern return $str; } } } function writeCsvFile($array,$fileName){ $columnNameArray=['BOOK_ID','LEVEL1','LEVEL2', 'Name','PERIOD','TimeSpan:begin','TimeSpan:end','PAGE','SECTION','CONTENT', 'Description']; $fp=fopen("./csv_files/".$fileName.".csv","w"); fputcsv($fp,$columnNameArray); foreach($array as $row){ $book=array(); $row['Description']=''; foreach($columnNameArray as $column){ $book[$column]=$row[$column]; } $book['Name']="(".$row['PERIOD'].") ".$row['Name']; $row['AUTHOR']=str_replace("(","(",$row['AUTHOR']); $row['AUTHOR']=str_replace(")",") ",$row['AUTHOR']); $book['Description']=$row['VOLUME']." ╱ ".$row['AUTHOR']." ╱ ".$row['EDITION']; $book['SECTION']=''; foreach($row['SECTION'] as $idx=>$section){ $book['SECTION'].=$section['name']." ".$section['start_page']."-".$section['end_page']; if($idx!=sizeof($row['SECTION'])){ $book['SECTION'].=" ╱ "; } } fputcsv($fp,$book); } fclose($fp); } ?>