Mercurial > hg > extraction-interface
diff develop/models/extractapp.php @ 6:63e08b98032f
rewrite extraction interface into PHP MVC architecture.
(Although js hasn't been rewritten into MVC, it's fitted into the current PHP MVC architecture.)
- The root of the new PHP MVC is at 'develop/'.
- extraction interface is called "Extractapp" with several action, eg TaggingText, EditWordlist, EditTaglist, ExportTable.
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Thu, 05 Feb 2015 16:07:53 +0100 |
parents | |
children | 584b1623e9ef |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/develop/models/extractapp.php Thu Feb 05 16:07:53 2015 +0100 @@ -0,0 +1,466 @@ + + +<?php + +class ExtractappModel extends BaseModel{ + + public function Index() { + return array("Index Value 1", "Value 2", "Value 3"); + } + + protected $section_id, $data_path; + + + private function Initialize($_urlvalues) { + $this->SetSectionId($_urlvalues); + } + + // === for tagging === + public function StartTagging($urlvalues) { + + $this->Initialize($urlvalues); + + $section_id = $this->GetSectionId(); + + $stringInput = $this->GetSectionContent(); + + $taglistArray = $this->GetTaglistArray(); + + $wordlistArray = $this->GetWordlistArray(); + + $data = array(); + $data['stringInput'] = $stringInput; + $data['taglistArray'] = $taglistArray; + $data['wordlistArray'] = $wordlistArray; + $data['section_id'] = $section_id; + + + return $data; + + } + + public function SaveFullText($postdata) { + if ($postdata['text']){ + $date = date('Y_m_d_H_i_s', time()); + if ( file_exists("data/parsing_files/".$postdata['filename'].".txt") ) { + $oldFile = file_get_contents("data/parsing_files/".$postdata['filename'].".txt"); + file_put_contents("data/parsing_files/".$postdata['filename']."_".$date.".txt", $oldFile); + } + + if (get_magic_quotes_gpc()) { + $require = stripslashes($postdata['text']); + } else { + $require = $postdata['text']; + } + + $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require); + $require = preg_replace('/&/u', "&", $require); + $require = preg_replace("/○/u", " ", $require); + $require = preg_replace("/<br>/u", "\n", $require); + $require = preg_replace("/<br>/u", "\n", $require); + file_put_contents("data/parsing_files/".$postdata['filename'].".txt", $require); + } + + } + + + + // === for export table === + public function ExportTable($urlvalues, $postdata) { + $this->Initialize($urlvalues); + $content = $postdata['content']; + + + // outputTableArray: + $section_id = $this->GetSectionId(); + + $section_info = $this->GetSectionInfo(); + + $sectionName = $section_info['sectionName']; + $bookId = $section_info['bookId']; + + $books_info = $this->GetBooksInfo($bookId); + $bookName = $books_info['bookName']; + + $outputTableArray = array(); + + $taglistArray = $this->GetTaglistArray(); + + $outputTableArray[0]=array(); + $outputTableArray[0][0]=array(); + $outputTableArray[0][1]=array(); + foreach ( $taglistArray as $value ) { + $outputTableArray[0][0][$value[2]] = $value[1]; + $outputTableArray[0][1][$value[2]] = $value[1]."(Title)"; + } + $outputTableArray[0]["other"] = "其他"; + $outputTableArray[0]["page"] = "頁數"; + $outputTableArray[0]["full"] = "全文"; + + foreach ( $taglistArray as $tagValue ) { + $content = preg_replace("/<\/".$tagValue[2].">○*<".$tagValue[2].">/u", "", $content); + $content = preg_replace("/<".$tagValue[2].">[ ]*<\/".$tagValue[2].">/u", "", $content); + } + + $contentLineArray = explode( "<br>", $content ); + + $count=0; + $pageNow=NULL; + foreach ( $contentLineArray as $value ) { + $count++; + $recordString = $value; + $otherString = $recordString; + //echo $recordString."<br>\n"; + if ( preg_match("/【<a(.*?)>(.*?)<\/a>】/u", $recordString, $matches) ) { + $pageNow = $matches[2]; + } + foreach ( $taglistArray as $tagValue ) { + if ( preg_match_all("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", $recordString, $matches, PREG_SET_ORDER) ) { + foreach ( $matches as $matchesValue ) { + $matchesValue[1] = preg_replace("/○/u", "", $matchesValue[1]); + if ( preg_match_all("/〈(.*?)〉/u", $matchesValue[1], $matches2, PREG_SET_ORDER) ) { + foreach ( $matches2 as $matches2Value ) { + if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) { + $outputTableArray[$count][0][$tagValue[2]] .= ";".$matches2Value[1]; + } else { + $outputTableArray[$count][0][$tagValue[2]] = $matches2Value[1]; + } + } + } else { + if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) { + $outputTableArray[$count][0][$tagValue[2]] .= ";".$matchesValue[1]; + } else { + $outputTableArray[$count][0][$tagValue[2]] = $matchesValue[1]; + } + } + } + $otherString = preg_replace("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", " ", $otherString); + } + } + $otherString = preg_replace("/○/u", "", $otherString); + $outputTableArray[$count]["other"] = $otherString; + $outputTableArray[$count]["page"] = $pageNow; + $value = preg_replace("/>/u", ">", $value); + $value = preg_replace("/</u", "<", $value); + $outputTableArray[$count]["full"] = $value; + } + + foreach ( $outputTableArray as $arrayIndex => $arrayValue ) { + if ( !isset($arrayValue[0]["person"]) ) { + unset($outputTableArray[$arrayIndex]); + } + } + + + $data = array(); + + $data['outputTableArray'] = $outputTableArray; + $data['bookId'] = $bookId; + $data['section_id'] = $section_id; + $data['bookName'] = $bookName; + $data['sectionName'] = $sectionName; + + return $data; + + } + + // === for manage tag list === + public function EditTaglist($urlvalues) { + $this->Initialize($urlvalues); + $result = $this->GetTaglist(); + $taglistArray = array(); + while ($row = mysql_fetch_assoc($result)) { + $taglistArray[$row['id']] = array($row['name'], $row['tag'], $row['color']); + } + + $data = array(); + $data['taglistArray'] = $taglistArray; + return $data; + + } + + private function _GetTag($_postdata) { + if (get_magic_quotes_gpc()) { + $id = stripslashes($_postdata['id']); + $name = stripslashes($_postdata['name']); + $tag = stripslashes($_postdata['tag']); + $color = stripslashes($_postdata['color']); + } else { + $id = $_postdata['id']; + $name = $_postdata['name']; + $tag = $_postdata['tag']; + $color = $_postdata['color']; + } + return array($id, $name, $tag, $color); + } + + public function NewTagElement($postdata) { + if ($postdata['id']){ + list($id, $name, $tag, $color) = $this->_GetTag($postdata); + + $query1 = sprintf("INSERT INTO `taglist` (`id`, `name`, `tag`, `color`, `systemName`) VALUES (%s, %s, %s, %s, %s)", + $this->GetSQLValueString($id, "int"), + $this->GetSQLValueString($name, "text"), + $this->GetSQLValueString($tag, "text"), + $this->GetSQLValueString($color, "text"), + $this->GetSQLValueString($this->systemNAME, "text")); + $result1 = mysql_query($query1); + } + + } + + public function SaveTagElement($postdata) { + if ($postdata['id']){ + list($id, $name, $tag, $color) = $this->_GetTag($postdata); + + $queryInsert = sprintf("UPDATE taglist SET `name`=%s, `tag`=%s, `color`=%s WHERE `id`=%s", + $this->GetSQLValueString($name, "text"), + $this->GetSQLValueString($tag, "text"), + $this->GetSQLValueString($color, "text"), + $this->GetSQLValueString($id, "int")); + $resultInsert = mysql_query($queryInsert); + + } + + } + + public function DeleteTag($postdata) { + if ($postdata['id']) { + $queryInsert = sprintf("DELETE FROM `taglist` WHERE `id` = %s", stripslashes($postdata['id'])); + $resultInsert = mysql_query($queryInsert); + } + + } + // =========================== + + // === for manage wordlist === + public function EditWordlist($urlvalues) { + $this->Initialize($urlvalues); + $result = $this->GetWordlist(); + $wordlistArray = array(); + while ($row = mysql_fetch_assoc($result)) { + $wordlistArray[$row['id']] = $row['name']; + } + + $data = array(); + $data['wordlistArray'] = $wordlistArray; + return $data; + } + + + public function AddNewList($postdata) { + if ($postdata['text']){ + if (get_magic_quotes_gpc()) { + $name = stripslashes($postdata['text']); + } else { + $name = $postdata['text']; + } + + $query1 = sprintf("INSERT INTO `wordlist` (`name`, `systemName`) VALUES (%s, %s)", + $this->GetSQLValueString($name, "text"), + $this->GetSQLValueString($this->systemNAME, "text")); + $result1 = mysql_query($query1); + file_put_contents( "data/wordlist/".mysql_insert_id().".txt", "(empty now)"); + } + } + + + public function SaveWordlist($postdata) { + + if ($postdata['text']){ + $date = date('Y_m_d_H_i_s', time()); + if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) { + $filename = "data/wordlist/".$postdata['filename']."_".$date.".txt"; + $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt"); + file_put_contents($filename, $oldFile); + } else { + $filename = "data/wordlist/".$postdata['filename'].".txt"; + } + + + if (get_magic_quotes_gpc()) { + $require = stripslashes($postdata['text']); + } else { + $require = $postdata['text']; + } + + $require = preg_replace("/<br>/u", "<br>", $require); + file_put_contents($filename, $require); + } + /* + if ($postdata['text']){ + $date = date('Y_m_d_H_i_s', time()); + if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) { + $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt"); + file_put_contents("data/wordlist/".$postdata['filename']."_".$date.".txt", $oldFile); + } + + if (get_magic_quotes_gpc()) { + $require = stripslashes($postdata['text']); + } else { + $require = $postdata['text']; + } + + // $require = preg_replace("/<br>/u", "", $require); + file_put_contents("data/wordlist/".$postdata['filename'].".txt", $require); + } + */ + } + + + // ======================================= + + public function sortFunction($a,$b) { + return strlen($b)-strlen($a); + } + + + private function SetSectionId($_urlvalues) { + // TODO: maybe get user info also + // get book id from url + if ($_urlvalues['id'] != "") { + $section_id = $_urlvalues['id']; + } else { + return json_encode("Error: No section id"); + /* ???? */ + /* + $get_book_id = $urlvalues['book']; + $get_start = $urlvalues['start']; + $get_end = $urlvalues['end']; + */ + } + $this->section_id = $section_id; + + } + + private function GetSectionId() { + if ($this->section_id) { + return $this->section_id; + } else { + return json_encode("Error: No section id"); + } + } + + private function GetSectionInfo() { + $section_id = $this->GetSectionId(); + if (!is_numeric($section_id)){ + return $section_id; + } + + $result = $this->GetSectionsByID($section_id); + + + while ($row = mysql_fetch_assoc($result)) { + $bookId=$row['books_id']; + $startPage=$row['start_page']; + $endPage=$row['end_page']; + $sectionName = $row['name']; + } + + $data = array(); + $data['bookId'] = $bookId; + $data['startPage'] = $startPage; + $data['endPage'] = $endPage; + $data['sectionName'] = $sectionName; + + return $data; + } + + private function GetSectionContent() { + $section_id = $this->GetSectionId(); + $section_info = $this->GetSectionInfo(); + + $bookId = $section_info['bookId']; + $startPage = $section_info['startPage']; + $endPage = $section_id['endPage']; + + + $contentString=""; + $data_path = $this->GetDataPath(); + if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { + $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt"); + $stringInput = $contentString; + $stringInput = preg_replace("/ /u", "○", $stringInput); + $stringInput = preg_replace("/\n/u", "<br>", $stringInput); + $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); + } else { + $query = sprintf("SELECT `content`, `line`, `books_id` FROM `contents` WHERE `books_id`=\"%s\" AND `line`>=%d AND `line`<=%d", $bookId, $startPage, $endPage); + $result = mysql_query($query); + while ($row = mysql_fetch_assoc($result)) { + $contentString.="【".$row['line']."】".$row['content']."\n"; + } + $stringInput = $contentString; + $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); + $stringInput = preg_replace("/ /u", "○", $stringInput); + $stringInput = preg_replace("/\n/u", "<br>", $stringInput); + $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); + } + + return $stringInput; + } + + private function GetDataPath() { + return getcwd()."/data/"; // get the current file path, which is getcwd(), and concatenate with "/data/" + } + + private function GetTaglistArray() { + $taglistArray=""; + $result = $this->GetTaglist(); + while ($row = mysql_fetch_assoc($result)) { + $taglistArray[] = array( $row['id'], $row['name'], $row['tag'], $row['color'] ); + + } + + return $taglistArray; + + } + + private function GetWordlistArray() { + $wordlistArray=""; + $result = $this->GetWordlist(); + while ($row = mysql_fetch_assoc($result)) { + $listString = file_get_contents("wordlist/".$row['id'].".txt"); + $listString = preg_replace("/<div>/u", "\n", $listString); + $listString = preg_replace("/<\/div>/u", "", $listString); + $listString = preg_replace("/<span(.*?)>/u", "", $listString); + $listString = preg_replace("/<\/span>/u", "", $listString); + //$listString = preg_replace("/\n/u", "|", $listString); + + $wordlistArray2 = explode( "\n", $listString ); + usort($wordlistArray2,'sortFunction'); + foreach ( $wordlistArray2 as $index=>$value ) { + $wordlistArray2[$index] = implode("○?", preg_split("/(?<!^)(?!$)/u", $value)); + } + foreach ( $wordlistArray2 as $index=>$value ) { + if ($value=="") unset($wordlistArray2[$index]); + + } + $listString = implode("|", $wordlistArray2); + + if ( $listString[0]=="|" ) $listString = substr($listString, 1); + $wordlistArray[] = array( $row['id'], $row['name'], $listString ); + } + + + return $wordlistArray; + + } + + protected function GetBooksInfo($bookId) { + $result = $this->GetSectionsByID($bookId); + while ($row = mysql_fetch_assoc($result)) { + $bookName = $row['name']; + } + + $data = array(); + $data['bookName'] = $bookName; + + return $data; + } + + +} + + + +?>