view search_function.php @ 1:38851c894301

path config
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Tue, 24 Mar 2015 16:27:18 +0100
parents c9363a90b8b5
children 45cbafdec56e
line wrap: on
line source

<?php

include_once('config/Lib_mb_utf8.php');
include_once('config/config.php');

set_time_limit(0);
ini_set('memory_limit', '-1');

$link_mysql = mysql_connect($mysql_server, $mysql_user, $mysql_password);
mysql_query("SET NAMES utf8");

if (!$link_mysql) {
	die('Could not connect: ' . mysql_error());
}

$db_selected = mysql_select_db($mysql_database, $link_mysql);
if (!$db_selected) {
	die ('Can\'t use foo : ' . mysql_error());
}
function trimKeyword($keywords){
	$token=strtok($keywords,",");
	while($token!=false){
		$token=trim($token);
		$keywordArray[]=$token;
		$token=strtok(",");
	}
	return $keywordArray;
}

function search($keywordArray){
	$condition="";
	foreach($keywordArray as $i=>$keyword){
		if($i!=0){
			$condition.=" OR ";
		}
		$condition.=" contents.content LIKE  '%".$keyword."%' ";
	}
	//books.name AS book_name, books.level1, books.level2, books.period, contents.books_id, contents.line, contents.content 

	// TODO: book_info is now merged to books table
	$query="SELECT books.level1 AS LEVEL1, books.level2 AS LEVEL2,
                        books.name AS Name, books.period AS PERIOD,
                        books.start_year AS 'TimeSpan:begin', books.end_year AS 'TimeSpan:end',
                        books.id AS BOOK_ID, contents.line AS PAGE, contents.content AS CONTENT,
                	books.volume AS VOLUME, books.author AS AUTHOR, books.edition AS EDITION
		FROM contents
		JOIN books ON contents.books_id = books.id
		WHERE ".$condition."
		ORDER BY contents.books_id, contents.line";
	$result = mysql_query($query);
	if (!$result) {
		echo mysql_error();
	}
	echo "result length: ".mysql_num_rows($result)."<br>";

	while ($row = mysql_fetch_assoc($result)) { //Find the section(s) where the page belongs to
		//$resultArray[$i]=$row;
		$query = "SELECT MAX(`id`) FROM `sections_versions` WHERE `books_id` ='".$row['BOOK_ID']."'";
		
		$r = mysql_fetch_row(mysql_query($query));
   		$max_version_id = $r[0];
   		
		$subQuery = "";
   		if ($max_version_id) {
			$subQuery="SELECT id, name, start_page, end_page
			FROM sections_revisions
			WHERE books_id=".$row['BOOK_ID']." AND start_page<=".$row['PAGE']." AND end_page>=".$row['PAGE']." AND sections_revisions.deleted=0 AND sections_revisions.versions_id=".$max_version_id;

   		} else {
			$subQuery="SELECT id, name, start_page, end_page 
			FROM sections
			WHERE books_id=".$row['BOOK_ID']." AND start_page<=".$row['PAGE']." AND end_page>=".$row['PAGE'];
   		}

		$subResult=mysql_query($subQuery);
		$sectionArray=array();	
		while($subRow=mysql_fetch_assoc($subResult)){
			$sectionArray[]=$subRow;
		}
		$row['SECTION']=$sectionArray;

		$resultArray[]=$row;
	}
	//echo "array length: ".sizeof($resultArray)."<br>";
	return $resultArray;
}
$NO_TAG=0;
$LOCUST_TEMPLE=1;

function printTable($array,$keywordArray,$filename,$tag){ //print HTML
	global $NO_TAG, $LOCUST_TEMPLE;
	$header='<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
		<html>
		<head>
		<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
		<link href="../search.css" type="text/css" rel="stylesheet"/>
		<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.6.2/jquery.min.js" type="text/javascript"></script>
		<script src="../search.js" charset="utf-8"></script>
		</head>
		<body>';
	$keywordStr="";
	foreach($keywordArray as $i=>$keyword){
		if($i!=0){
			$keywordStr.=", ";	
		}
		$keywordStr.=$keyword;
	}


	global $system_root_url, $server_host, $lgserver_url;

	$fp=fopen("./search_results/".$filename.".html","w");
	fwrite($fp,$header);
    $str="<a href='".$server_host."LGMap/get_coordinates_for_listed_books.php?file=".$filename.".csv&name=".$keywordStr."' target='_blank'>view the distribution on the map</a><br><br>";
	fwrite($fp,$str);
	$str.="<a href='".$system_root_url."search_results/".$filename.".html' target='_blank'>html version</a><br><br>";
	// $str.="<a href='http://".$_SERVER['HTTP_HOST']."/LGSearch/search_results/".$filename.".html' target='_blank'>html version</a><br><br>";
	echo $str;
	echo sizeof($array)." result(s) of \"".$keywordStr."\" ";
	fwrite($fp,sizeof($array)." result(s) of \"".$keywordStr."\"<br>");
	$str="<table>";
	$str.="<tr>";
	$str.="<td class='sequence'>#<td class='bookId'>book id<td class='bookName'>book name<td class='level1'>level1<td class='level2'>level2<td class='period'>period<td class='sectionName'>section info<td class='page'>page<td class='content'>content";
	fwrite($fp,$str);
	echo $str;
	$i=1;
	foreach($array as $row){
		$str="<tr>";
		$str.="<td>".$i;
		$str.="<td>".$row['BOOK_ID'];
		$str.="<td>".$row['Name'];
		$str.="<td>".$row['LEVEL1'];
		$str.="<td>".$row['LEVEL2'];
		$str.="<td>".$row['PERIOD'];
		$str.="<td>";
		fwrite($fp,$str);
		echo $str;
		foreach($row['SECTION'] as $section){
			// TODO: config to current extraction-interface OR Toc OR not at all??
			// redirect to LGServer
			$str="<div class='section'><a href='".$lgserver_url."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>";
			echo $str;
			$str="<div class='section'><a href='".$lgserver_url."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>";
			fwrite($fp,$str);
			/*
			$str="<div class='section'><a href='/interface/tagging_text.php?id=".$section['id']."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>";
			echo $str;
			$str="<div class='section'><a href='/interface/tagging_text.php?id=".$section['id']."' target='_blank'>".$section['name']."</a>p".$section['start_page']."-".$section['end_page']."</div>";
			fwrite($fp,$str);
			*/
		}
		$str="<td>".$row['PAGE'];
		if($tag==$NO_TAG){
			$str.="<td>".$row['CONTENT'];
		}else if($tag==$LOCUST_TEMPLE){
			$str.="<td>".findLocustTempleDescription($row['CONTENT'],$keywordArray,0);
		}
		fwrite($fp,$str);
		echo $str;
		$i++;
	}

	echo "</table>";
	fwrite($fp,"</table></body></html>");
	fclose($fp);
}

function findLocustTempleDescription($str,$keywordArray,$findUnknownTemple){
	$result="";
	$pattern[0]="/(.*)(";
	foreach($keywordArray as $i=>$keyword){
		$pattern[0].=$keyword;
		if($i!=sizeof($keywordArray)-1){
			$pattern[0].="|";
		}
	}
	$pattern[0].=")(.*)/u";
	//0=whole,1=text,2=locust temple,3=description
	$pattern[1]="/(.*\s+)(\S{1,5}[廟寺祠])(.*)/u";
	//0=whole,1=descripption,2=unknown temple,3=text
	$success=preg_match($pattern[$findUnknownTemple],$str,$match); //note that preg_match only matches the last match!
	if($findUnknownTemple==0){
		if($success==1){//find string "locust temple"
			$result=findLocustTempleDescription($match[1],$keywordArray,0);
			$result.="<keyword>".$match[2]."</keyword>";
			$result.=findLocustTempleDescription($match[3],$keywordArray,1);
			return $result;
		}else{//no string "locust temple"
			return "<irrelevant>".$str."</irrelevant>";
		}
	}else{
		if($success==1){//find the pattern "locust temple......unknown temple"
			$result=findLocustTempleDescription($match[1],$keywordArray,1);
			$result.="<irrelevant>".$match[2].$match[3]."</irrelevant>";
			//$result.="<keyword>".$match[2]."</keyword>".$match[3];
			return $result;
		}else{//can't find the pattern
			return $str;
		}

	}
}

function writeCsvFile($array,$fileName){
	$columnNameArray=['BOOK_ID','LEVEL1','LEVEL2',
        	'Name','PERIOD','TimeSpan:begin','TimeSpan:end','PAGE','SECTION','CONTENT',
        	'Description'];
	$fp=fopen("./csv_files/".$fileName.".csv","w");
	fputcsv($fp,$columnNameArray);
	foreach($array as $row){
		$book=array();
		$row['Description']='';
		foreach($columnNameArray as $column){
			$book[$column]=$row[$column];
		}
		$book['Name']="(".$row['PERIOD'].") ".$row['Name'];
        	$row['AUTHOR']=str_replace("(","(",$row['AUTHOR']);
        	$row['AUTHOR']=str_replace(")",") ",$row['AUTHOR']);
        	$book['Description']=$row['VOLUME']." ╱ ".$row['AUTHOR']." ╱ ".$row['EDITION'];
		$book['SECTION']='';
		foreach($row['SECTION'] as $idx=>$section){
			$book['SECTION'].=$section['name']." ".$section['start_page']."-".$section['end_page'];
			if($idx!=sizeof($row['SECTION'])){
				$book['SECTION'].=" ╱ ";
			}
		}
		fputcsv($fp,$book);
	}
	fclose($fp);
}

?>