Mercurial > hg > LGSearch
diff search_function.php @ 0:c9363a90b8b5
first commit to development server
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 24 Mar 2015 15:12:34 +0100 |
parents | |
children | 38851c894301 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/search_function.php Tue Mar 24 15:12:34 2015 +0100 @@ -0,0 +1,233 @@ +<?php + +include_once('config/Lib_mb_utf8.php'); +include_once('config/config.php'); + +set_time_limit(0); +ini_set('memory_limit', '-1'); + +$link_mysql = mysql_connect($mysql_server, $mysql_user, $mysql_password); +mysql_query("SET NAMES utf8"); + +if (!$link_mysql) { + die('Could not connect: ' . mysql_error()); +} + +$db_selected = mysql_select_db($mysql_database, $link_mysql); +if (!$db_selected) { + die ('Can\'t use foo : ' . mysql_error()); +} +function trimKeyword($keywords){ + $token=strtok($keywords,","); + while($token!=false){ + $token=trim($token); + $keywordArray[]=$token; + $token=strtok(","); + } + return $keywordArray; +} + +function search($keywordArray){ + $condition=""; + foreach($keywordArray as $i=>$keyword){ + if($i!=0){ + $condition.=" OR "; + } + $condition.=" contents.content LIKE '%".$keyword."%' "; + } + //books.name AS book_name, books.level1, books.level2, books.period, contents.books_id, contents.line, contents.content + + // TODO: book_info is now merged to books table + $query="SELECT books.level1 AS LEVEL1, books.level2 AS LEVEL2, + books.name AS Name, books.period AS PERIOD, + books.start_year AS 'TimeSpan:begin', books.end_year AS 'TimeSpan:end', + books.id AS BOOK_ID, contents.line AS PAGE, contents.content AS CONTENT, + books.volume AS VOLUME, books.author AS AUTHOR, books.edition AS EDITION + FROM contents + JOIN books ON contents.books_id = books.id + WHERE ".$condition." + ORDER BY contents.books_id, contents.line"; + $result = mysql_query($query); + if (!$result) { + echo mysql_error(); + } + echo "result length: ".mysql_num_rows($result)."<br>"; + + while ($row = mysql_fetch_assoc($result)) { //Find the section(s) where the page belongs to + //$resultArray[$i]=$row; + $query = "SELECT MAX(`id`) FROM `sections_versions` WHERE `books_id` ='".$row['BOOK_ID']."'"; + + $r = mysql_fetch_row(mysql_query($query)); + $max_version_id = $r[0]; + + $subQuery = ""; + if ($max_version_id) { + $subQuery="SELECT id, name, start_page, end_page + FROM sections_revisions + WHERE books_id=".$row['BOOK_ID']." AND start_page<=".$row['PAGE']." AND end_page>=".$row['PAGE']." AND sections_revisions.deleted=0 AND sections_revisions.versions_id=".$max_version_id; + + } else { + $subQuery="SELECT id, name, start_page, end_page + FROM sections + WHERE books_id=".$row['BOOK_ID']." AND start_page<=".$row['PAGE']." AND end_page>=".$row['PAGE']; + } + + $subResult=mysql_query($subQuery); + $sectionArray=array(); + while($subRow=mysql_fetch_assoc($subResult)){ + $sectionArray[]=$subRow; + } + $row['SECTION']=$sectionArray; + + $resultArray[]=$row; + } + //echo "array length: ".sizeof($resultArray)."<br>"; + return $resultArray; +} +$NO_TAG=0; +$LOCUST_TEMPLE=1; + +function printTable($array,$keywordArray,$filename,$tag){ //print HTML + global $NO_TAG, $LOCUST_TEMPLE; + $header='<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> + <html> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> + <link href="./search.css" type="text/css" rel="stylesheet"/> + <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js" type="text/javascript"></script> + <script src="./search.js" charset="utf-8"></script> + </head> + <body>'; + $keywordStr=""; + foreach($keywordArray as $i=>$keyword){ + if($i!=0){ + $keywordStr.=", "; + } + $keywordStr.=$keyword; + } + + + global $system_root_url, $server_host, $lgserver_url; + + $fp=fopen("./search_results/".$filename.".html","w"); + fwrite($fp,$header); + $str="<a href='".$server_host."LGMap/get_coordinates_for_listed_books.php?file=".$filename.".csv&name=".$keywordStr."' target='_blank'>view the distribution on the map</a><br><br>"; + fwrite($fp,$str); + $str.="<a href='".$system_root_url."search_results/".$filename.".html' target='_blank'>html version</a><br><br>"; + // $str.="<a href='http://".$_SERVER['HTTP_HOST']."/LGSearch/search_results/".$filename.".html' target='_blank'>html version</a><br><br>"; + echo $str; + echo sizeof($array)." result(s) of \"".$keywordStr."\" "; + fwrite($fp,sizeof($array)." result(s) of \"".$keywordStr."\"<br>"); + $str="<table>"; + $str.="<tr>"; + $str.="<td class='sequence'>#<td class='bookId'>book id<td class='bookName'>book name<td class='level1'>level1<td class='level2'>level2<td class='period'>period<td class='sectionName'>section info<td class='page'>page<td class='content'>content"; + fwrite($fp,$str); + echo $str; + $i=1; + foreach($array as $row){ + $str="<tr>"; + $str.="<td>".$i; + $str.="<td>".$row['BOOK_ID']; + $str.="<td>".$row['Name']; + $str.="<td>".$row['LEVEL1']; + $str.="<td>".$row['LEVEL2']; + $str.="<td>".$row['PERIOD']; + $str.="<td>"; + fwrite($fp,$str); + echo $str; + foreach($row['SECTION'] as $section){ + // TODO: config to current extraction-interface OR Toc OR not at all?? + // redirect to LGServer + $str="<div class='section'><a href='".$lgserver_url."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; + echo $str; + $str="<div class='section'><a href='".$lgserver_url."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; + fwrite($fp,$str); + /* + $str="<div class='section'><a href='/interface/tagging_text.php?id=".$section['id']."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; + echo $str; + $str="<div class='section'><a href='/interface/tagging_text.php?id=".$section['id']."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>"; + fwrite($fp,$str); + */ + } + $str="<td>".$row['PAGE']; + if($tag==$NO_TAG){ + $str.="<td>".$row['CONTENT']; + }else if($tag==$LOCUST_TEMPLE){ + $str.="<td>".findLocustTempleDescription($row['CONTENT'],$keywordArray,0); + } + fwrite($fp,$str); + echo $str; + $i++; + } + + echo "</table>"; + fwrite($fp,"</table></body></html>"); + fclose($fp); +} + +function findLocustTempleDescription($str,$keywordArray,$findUnknownTemple){ + $result=""; + $pattern[0]="/(.*)("; + foreach($keywordArray as $i=>$keyword){ + $pattern[0].=$keyword; + if($i!=sizeof($keywordArray)-1){ + $pattern[0].="|"; + } + } + $pattern[0].=")(.*)/u"; + //0=whole,1=text,2=locust temple,3=description + $pattern[1]="/(.*\s+)(\S{1,5}[廟寺祠])(.*)/u"; + //0=whole,1=descripption,2=unknown temple,3=text + $success=preg_match($pattern[$findUnknownTemple],$str,$match); //note that preg_match only matches the last match! + if($findUnknownTemple==0){ + if($success==1){//find string "locust temple" + $result=findLocustTempleDescription($match[1],$keywordArray,0); + $result.="<keyword>".$match[2]."</keyword>"; + $result.=findLocustTempleDescription($match[3],$keywordArray,1); + return $result; + }else{//no string "locust temple" + return "<irrelevant>".$str."</irrelevant>"; + } + }else{ + if($success==1){//find the pattern "locust temple......unknown temple" + $result=findLocustTempleDescription($match[1],$keywordArray,1); + $result.="<irrelevant>".$match[2].$match[3]."</irrelevant>"; + //$result.="<keyword>".$match[2]."</keyword>".$match[3]; + return $result; + }else{//can't find the pattern + return $str; + } + + } +} + +function writeCsvFile($array,$fileName){ + $columnNameArray=['BOOK_ID','LEVEL1','LEVEL2', + 'Name','PERIOD','TimeSpan:begin','TimeSpan:end','PAGE','SECTION','CONTENT', + 'Description']; + $fp=fopen("./csv_files/".$fileName.".csv","w"); + fputcsv($fp,$columnNameArray); + foreach($array as $row){ + $book=array(); + $row['Description']=''; + foreach($columnNameArray as $column){ + $book[$column]=$row[$column]; + } + $book['Name']="(".$row['PERIOD'].") ".$row['Name']; + $row['AUTHOR']=str_replace("(","(",$row['AUTHOR']); + $row['AUTHOR']=str_replace(")",") ",$row['AUTHOR']); + $book['Description']=$row['VOLUME']." ╱ ".$row['AUTHOR']." ╱ ".$row['EDITION']; + $book['SECTION']=''; + foreach($row['SECTION'] as $idx=>$section){ + $book['SECTION'].=$section['name']." ".$section['start_page']."-".$section['end_page']; + if($idx!=sizeof($row['SECTION'])){ + $book['SECTION'].=" ╱ "; + } + } + fputcsv($fp,$book); + } + fclose($fp); +} + +?> +