diff search/search_function.php @ 0:b12c99b7c3f0

commit for previous development
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Mon, 19 Jan 2015 17:13:49 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/search/search_function.php	Mon Jan 19 17:13:49 2015 +0100
@@ -0,0 +1,204 @@
+<?php
+
+include_once('../interface/Lib_mb_utf8.php');
+include_once('../interface/config.php');
+
+set_time_limit(0);
+ini_set('memory_limit', '-1');
+
+$link_mysql = mysql_connect($mysql_server, $mysql_user, $mysql_password);
+mysql_query("SET NAMES utf8");
+
+if (!$link_mysql) {
+	die('Could not connect: ' . mysql_error());
+}
+
+$db_selected = mysql_select_db($mysql_database, $link_mysql);
+if (!$db_selected) {
+	die ('Can\'t use foo : ' . mysql_error());
+}
+function trimKeyword($keywords){
+	$token=strtok($keywords,",");
+	while($token!=false){
+		$token=trim($token);
+		$keywordArray[]=$token;
+		$token=strtok(",");
+	}
+	return $keywordArray;
+}
+
+function search($keywordArray){
+	$condition="";
+	foreach($keywordArray as $i=>$keyword){
+		if($i!=0){
+			$condition.=" OR ";
+		}
+		$condition.=" contents.content LIKE  '%".$keyword."%' ";
+	}
+//books.name AS book_name, books.level1, books.level2, books.period, contents.books_id, contents.line, contents.content 
+	$query="SELECT books.level1 AS LEVEL1, books.level2 AS LEVEL2,
+                        books.name AS Name, books.period AS PERIOD,
+                        books.start_year AS 'TimeSpan:begin', books.end_year AS 'TimeSpan:end',
+                        books.id AS BOOK_ID, contents.line AS PAGE, contents.content AS CONTENT,
+                	info.volume AS VOLUME, info.author AS AUTHOR, info.edition AS EDITION
+		FROM contents
+		JOIN books ON contents.books_id = books.id
+                JOIN books_info info ON contents.books_id=info.books_id
+		WHERE ".$condition."
+		ORDER BY contents.books_id, contents.line";
+	//WHERE contents.content LIKE  '%".$keyword."%'
+	$result = mysql_query($query);
+	//echo "result length: ".mysql_num_rows($result)."<br>";
+	while ($row = mysql_fetch_assoc($result)) { //Find the section(s) where the page belongs to
+		//$resultArray[$i]=$row;
+		$subQuery="SELECT id, name, start_page, end_page 
+			FROM sections
+			WHERE books_id=".$row['BOOK_ID']." AND start_page<=".$row['PAGE']." AND end_page>=".$row['PAGE'];
+		$subResult=mysql_query($subQuery);
+		$sectionArray=array();	
+		while($subRow=mysql_fetch_assoc($subResult)){
+			$sectionArray[]=$subRow;
+		}
+		$row['SECTION']=$sectionArray;
+		$resultArray[]=$row;
+	}
+	//echo "array length: ".sizeof($resultArray)."<br>";
+	return $resultArray;
+}
+$NO_TAG=0;
+$LOCUST_TEMPLE=1;
+
+function printTable($array,$keywordArray,$filename,$tag){ //print HTML
+	global $NO_TAG, $LOCUST_TEMPLE;
+	$header='<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+		<html>
+		<head>
+		<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+		<link href="../search.css" type="text/css" rel="stylesheet"/>
+		<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js" type="text/javascript"></script>
+		<script src="../search.js" charset="utf-8"></script>
+		</head>
+		<body>';
+	$keywordStr="";
+	foreach($keywordArray as $i=>$keyword){
+		if($i!=0){
+			$keywordStr.=", ";	
+		}
+		$keywordStr.=$keyword;
+	}
+
+
+	$fp=fopen("search_results/".$filename.".html","w");
+	fwrite($fp,$header);
+        //$str="<a href='http://".$_SERVER['HTTP_HOST']."/map/map.php?mode=1&file=".$filename.".csv&name=".$keywordStr."' target='_blank'>view the distribution on the map</a><br><br>";
+        $str="<a href='http://".$_SERVER['HTTP_HOST']."/map/get_coordinates_for_listed_books.php?file=".$filename.".csv&name=".$keywordStr."' target='_blank'>view the distribution on the map</a><br><br>";
+	fwrite($fp,$str);
+	$str.="<a href='http://".$_SERVER['HTTP_HOST']."/search/search_results/".$filename.".html' target='_blank'>html version</a><br><br>";
+	echo $str;
+	echo sizeof($array)." result(s) of \"".$keywordStr."\" ";
+	fwrite($fp,sizeof($array)." result(s) of \"".$keywordStr."\"<br>");
+	$str="<table>";
+	$str.="<tr>";
+	$str.="<td class='sequence'>#<td class='bookId'>book id<td class='bookName'>book name<td class='level1'>level1<td class='level2'>level2<td class='period'>period<td class='sectionName'>section info<td class='page'>page<td class='content'>content";
+	fwrite($fp,$str);
+	echo $str;
+	$i=1;
+	foreach($array as $row){
+		$str="<tr>";
+		$str.="<td>".$i;
+		$str.="<td>".$row['BOOK_ID'];
+		$str.="<td>".$row['Name'];
+		$str.="<td>".$row['LEVEL1'];
+		$str.="<td>".$row['LEVEL2'];
+		$str.="<td>".$row['PERIOD'];
+		$str.="<td>";
+		fwrite($fp,$str);
+		echo $str;
+		foreach($row['SECTION'] as $section){
+			$str="<div class='section'><a href='/interface/tagging_text.php?id=".$section['id']."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>";
+			echo $str;
+			$str="<div class='section'><a href='/interface/tagging_text.php?id=".$section['id']."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>";
+			fwrite($fp,$str);
+		}
+		$str="<td>".$row['PAGE'];
+		if($tag==$NO_TAG){
+			$str.="<td>".$row['CONTENT'];
+		}else if($tag==$LOCUST_TEMPLE){
+			$str.="<td>".findLocustTempleDescription($row['CONTENT'],$keywordArray,0);
+		}
+		fwrite($fp,$str);
+		echo $str;
+		$i++;
+	}
+
+	echo "</table>";
+	fwrite($fp,"</table></body></html>");
+	fclose($fp);
+}
+
+function findLocustTempleDescription($str,$keywordArray,$findUnknownTemple){
+	$result="";
+	$pattern[0]="/(.*)(";
+	foreach($keywordArray as $i=>$keyword){
+		$pattern[0].=$keyword;
+		if($i!=sizeof($keywordArray)-1){
+			$pattern[0].="|";
+		}
+	}
+	$pattern[0].=")(.*)/u";
+	//0=whole,1=text,2=locust temple,3=description
+	$pattern[1]="/(.*\s+)(\S{1,5}[廟寺祠])(.*)/u";
+	//0=whole,1=descripption,2=unknown temple,3=text
+	$success=preg_match($pattern[$findUnknownTemple],$str,$match); //note that preg_match only matches the last match!
+	if($findUnknownTemple==0){
+		if($success==1){//find string "locust temple"
+			$result=findLocustTempleDescription($match[1],$keywordArray,0);
+			$result.="<keyword>".$match[2]."</keyword>";
+			$result.=findLocustTempleDescription($match[3],$keywordArray,1);
+			return $result;
+		}else{//no string "locust temple"
+			return "<irrelevant>".$str."</irrelevant>";
+		}
+	}else{
+		if($success==1){//find the pattern "locust temple......unknown temple"
+			$result=findLocustTempleDescription($match[1],$keywordArray,1);
+			$result.="<irrelevant>".$match[2].$match[3]."</irrelevant>";
+			//$result.="<keyword>".$match[2]."</keyword>".$match[3];
+			return $result;
+		}else{//can't find the pattern
+			return $str;
+		}
+
+	}
+}
+
+function writeCsvFile($array,$fileName){
+	$columnNameArray=['BOOK_ID','LEVEL1','LEVEL2',
+        	'Name','PERIOD','TimeSpan:begin','TimeSpan:end','PAGE','SECTION','CONTENT',
+        	'Description'];
+	$fp=fopen("./csv_files/".$fileName.".csv","w");
+	fputcsv($fp,$columnNameArray);
+	foreach($array as $row){
+		$book=array();
+		$row['Description']='';
+		foreach($columnNameArray as $column){
+			$book[$column]=$row[$column];
+		}
+		$book['Name']="(".$row['PERIOD'].") ".$row['Name'];
+        	$row['AUTHOR']=str_replace("(","(",$row['AUTHOR']);
+        	$row['AUTHOR']=str_replace(")",") ",$row['AUTHOR']);
+        	$book['Description']=$row['VOLUME']." ╱ ".$row['AUTHOR']." ╱ ".$row['EDITION'];
+		$book['SECTION']='';
+		foreach($row['SECTION'] as $idx=>$section){
+			$book['SECTION'].=$section['name']." ".$section['start_page']."-".$section['end_page'];
+			if($idx!=sizeof($row['SECTION'])){
+				$book['SECTION'].=" ╱ ";
+			}
+		}
+		fputcsv($fp,$book);
+	}
+	fclose($fp);
+}
+
+?>
+