Mercurial > hg > extraction-interface
view develop/models/extractapp.php @ 6:63e08b98032f
rewrite extraction interface into PHP MVC architecture.
(Although js hasn't been rewritten into MVC, it's fitted into the current PHP MVC architecture.)
- The root of the new PHP MVC is at 'develop/'.
- extraction interface is called "Extractapp" with several action, eg TaggingText, EditWordlist, EditTaglist, ExportTable.
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Thu, 05 Feb 2015 16:07:53 +0100 |
parents | |
children | 584b1623e9ef |
line wrap: on
line source
<?php class ExtractappModel extends BaseModel{ public function Index() { return array("Index Value 1", "Value 2", "Value 3"); } protected $section_id, $data_path; private function Initialize($_urlvalues) { $this->SetSectionId($_urlvalues); } // === for tagging === public function StartTagging($urlvalues) { $this->Initialize($urlvalues); $section_id = $this->GetSectionId(); $stringInput = $this->GetSectionContent(); $taglistArray = $this->GetTaglistArray(); $wordlistArray = $this->GetWordlistArray(); $data = array(); $data['stringInput'] = $stringInput; $data['taglistArray'] = $taglistArray; $data['wordlistArray'] = $wordlistArray; $data['section_id'] = $section_id; return $data; } public function SaveFullText($postdata) { if ($postdata['text']){ $date = date('Y_m_d_H_i_s', time()); if ( file_exists("data/parsing_files/".$postdata['filename'].".txt") ) { $oldFile = file_get_contents("data/parsing_files/".$postdata['filename'].".txt"); file_put_contents("data/parsing_files/".$postdata['filename']."_".$date.".txt", $oldFile); } if (get_magic_quotes_gpc()) { $require = stripslashes($postdata['text']); } else { $require = $postdata['text']; } $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require); $require = preg_replace('/&/u', "&", $require); $require = preg_replace("/○/u", " ", $require); $require = preg_replace("/<br>/u", "\n", $require); $require = preg_replace("/<br>/u", "\n", $require); file_put_contents("data/parsing_files/".$postdata['filename'].".txt", $require); } } // === for export table === public function ExportTable($urlvalues, $postdata) { $this->Initialize($urlvalues); $content = $postdata['content']; // outputTableArray: $section_id = $this->GetSectionId(); $section_info = $this->GetSectionInfo(); $sectionName = $section_info['sectionName']; $bookId = $section_info['bookId']; $books_info = $this->GetBooksInfo($bookId); $bookName = $books_info['bookName']; $outputTableArray = array(); $taglistArray = $this->GetTaglistArray(); $outputTableArray[0]=array(); $outputTableArray[0][0]=array(); $outputTableArray[0][1]=array(); foreach ( $taglistArray as $value ) { $outputTableArray[0][0][$value[2]] = $value[1]; $outputTableArray[0][1][$value[2]] = $value[1]."(Title)"; } $outputTableArray[0]["other"] = "其他"; $outputTableArray[0]["page"] = "頁數"; $outputTableArray[0]["full"] = "全文"; foreach ( $taglistArray as $tagValue ) { $content = preg_replace("/<\/".$tagValue[2].">○*<".$tagValue[2].">/u", "", $content); $content = preg_replace("/<".$tagValue[2].">[ ]*<\/".$tagValue[2].">/u", "", $content); } $contentLineArray = explode( "<br>", $content ); $count=0; $pageNow=NULL; foreach ( $contentLineArray as $value ) { $count++; $recordString = $value; $otherString = $recordString; //echo $recordString."<br>\n"; if ( preg_match("/【<a(.*?)>(.*?)<\/a>】/u", $recordString, $matches) ) { $pageNow = $matches[2]; } foreach ( $taglistArray as $tagValue ) { if ( preg_match_all("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", $recordString, $matches, PREG_SET_ORDER) ) { foreach ( $matches as $matchesValue ) { $matchesValue[1] = preg_replace("/○/u", "", $matchesValue[1]); if ( preg_match_all("/〈(.*?)〉/u", $matchesValue[1], $matches2, PREG_SET_ORDER) ) { foreach ( $matches2 as $matches2Value ) { if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) { $outputTableArray[$count][0][$tagValue[2]] .= ";".$matches2Value[1]; } else { $outputTableArray[$count][0][$tagValue[2]] = $matches2Value[1]; } } } else { if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) { $outputTableArray[$count][0][$tagValue[2]] .= ";".$matchesValue[1]; } else { $outputTableArray[$count][0][$tagValue[2]] = $matchesValue[1]; } } } $otherString = preg_replace("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", " ", $otherString); } } $otherString = preg_replace("/○/u", "", $otherString); $outputTableArray[$count]["other"] = $otherString; $outputTableArray[$count]["page"] = $pageNow; $value = preg_replace("/>/u", ">", $value); $value = preg_replace("/</u", "<", $value); $outputTableArray[$count]["full"] = $value; } foreach ( $outputTableArray as $arrayIndex => $arrayValue ) { if ( !isset($arrayValue[0]["person"]) ) { unset($outputTableArray[$arrayIndex]); } } $data = array(); $data['outputTableArray'] = $outputTableArray; $data['bookId'] = $bookId; $data['section_id'] = $section_id; $data['bookName'] = $bookName; $data['sectionName'] = $sectionName; return $data; } // === for manage tag list === public function EditTaglist($urlvalues) { $this->Initialize($urlvalues); $result = $this->GetTaglist(); $taglistArray = array(); while ($row = mysql_fetch_assoc($result)) { $taglistArray[$row['id']] = array($row['name'], $row['tag'], $row['color']); } $data = array(); $data['taglistArray'] = $taglistArray; return $data; } private function _GetTag($_postdata) { if (get_magic_quotes_gpc()) { $id = stripslashes($_postdata['id']); $name = stripslashes($_postdata['name']); $tag = stripslashes($_postdata['tag']); $color = stripslashes($_postdata['color']); } else { $id = $_postdata['id']; $name = $_postdata['name']; $tag = $_postdata['tag']; $color = $_postdata['color']; } return array($id, $name, $tag, $color); } public function NewTagElement($postdata) { if ($postdata['id']){ list($id, $name, $tag, $color) = $this->_GetTag($postdata); $query1 = sprintf("INSERT INTO `taglist` (`id`, `name`, `tag`, `color`, `systemName`) VALUES (%s, %s, %s, %s, %s)", $this->GetSQLValueString($id, "int"), $this->GetSQLValueString($name, "text"), $this->GetSQLValueString($tag, "text"), $this->GetSQLValueString($color, "text"), $this->GetSQLValueString($this->systemNAME, "text")); $result1 = mysql_query($query1); } } public function SaveTagElement($postdata) { if ($postdata['id']){ list($id, $name, $tag, $color) = $this->_GetTag($postdata); $queryInsert = sprintf("UPDATE taglist SET `name`=%s, `tag`=%s, `color`=%s WHERE `id`=%s", $this->GetSQLValueString($name, "text"), $this->GetSQLValueString($tag, "text"), $this->GetSQLValueString($color, "text"), $this->GetSQLValueString($id, "int")); $resultInsert = mysql_query($queryInsert); } } public function DeleteTag($postdata) { if ($postdata['id']) { $queryInsert = sprintf("DELETE FROM `taglist` WHERE `id` = %s", stripslashes($postdata['id'])); $resultInsert = mysql_query($queryInsert); } } // =========================== // === for manage wordlist === public function EditWordlist($urlvalues) { $this->Initialize($urlvalues); $result = $this->GetWordlist(); $wordlistArray = array(); while ($row = mysql_fetch_assoc($result)) { $wordlistArray[$row['id']] = $row['name']; } $data = array(); $data['wordlistArray'] = $wordlistArray; return $data; } public function AddNewList($postdata) { if ($postdata['text']){ if (get_magic_quotes_gpc()) { $name = stripslashes($postdata['text']); } else { $name = $postdata['text']; } $query1 = sprintf("INSERT INTO `wordlist` (`name`, `systemName`) VALUES (%s, %s)", $this->GetSQLValueString($name, "text"), $this->GetSQLValueString($this->systemNAME, "text")); $result1 = mysql_query($query1); file_put_contents( "data/wordlist/".mysql_insert_id().".txt", "(empty now)"); } } public function SaveWordlist($postdata) { if ($postdata['text']){ $date = date('Y_m_d_H_i_s', time()); if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) { $filename = "data/wordlist/".$postdata['filename']."_".$date.".txt"; $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt"); file_put_contents($filename, $oldFile); } else { $filename = "data/wordlist/".$postdata['filename'].".txt"; } if (get_magic_quotes_gpc()) { $require = stripslashes($postdata['text']); } else { $require = $postdata['text']; } $require = preg_replace("/<br>/u", "<br>", $require); file_put_contents($filename, $require); } /* if ($postdata['text']){ $date = date('Y_m_d_H_i_s', time()); if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) { $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt"); file_put_contents("data/wordlist/".$postdata['filename']."_".$date.".txt", $oldFile); } if (get_magic_quotes_gpc()) { $require = stripslashes($postdata['text']); } else { $require = $postdata['text']; } // $require = preg_replace("/<br>/u", "", $require); file_put_contents("data/wordlist/".$postdata['filename'].".txt", $require); } */ } // ======================================= public function sortFunction($a,$b) { return strlen($b)-strlen($a); } private function SetSectionId($_urlvalues) { // TODO: maybe get user info also // get book id from url if ($_urlvalues['id'] != "") { $section_id = $_urlvalues['id']; } else { return json_encode("Error: No section id"); /* ???? */ /* $get_book_id = $urlvalues['book']; $get_start = $urlvalues['start']; $get_end = $urlvalues['end']; */ } $this->section_id = $section_id; } private function GetSectionId() { if ($this->section_id) { return $this->section_id; } else { return json_encode("Error: No section id"); } } private function GetSectionInfo() { $section_id = $this->GetSectionId(); if (!is_numeric($section_id)){ return $section_id; } $result = $this->GetSectionsByID($section_id); while ($row = mysql_fetch_assoc($result)) { $bookId=$row['books_id']; $startPage=$row['start_page']; $endPage=$row['end_page']; $sectionName = $row['name']; } $data = array(); $data['bookId'] = $bookId; $data['startPage'] = $startPage; $data['endPage'] = $endPage; $data['sectionName'] = $sectionName; return $data; } private function GetSectionContent() { $section_id = $this->GetSectionId(); $section_info = $this->GetSectionInfo(); $bookId = $section_info['bookId']; $startPage = $section_info['startPage']; $endPage = $section_id['endPage']; $contentString=""; $data_path = $this->GetDataPath(); if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt"); $stringInput = $contentString; $stringInput = preg_replace("/ /u", "○", $stringInput); $stringInput = preg_replace("/\n/u", "<br>", $stringInput); $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); } else { $query = sprintf("SELECT `content`, `line`, `books_id` FROM `contents` WHERE `books_id`=\"%s\" AND `line`>=%d AND `line`<=%d", $bookId, $startPage, $endPage); $result = mysql_query($query); while ($row = mysql_fetch_assoc($result)) { $contentString.="【".$row['line']."】".$row['content']."\n"; } $stringInput = $contentString; $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); $stringInput = preg_replace("/ /u", "○", $stringInput); $stringInput = preg_replace("/\n/u", "<br>", $stringInput); $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); } return $stringInput; } private function GetDataPath() { return getcwd()."/data/"; // get the current file path, which is getcwd(), and concatenate with "/data/" } private function GetTaglistArray() { $taglistArray=""; $result = $this->GetTaglist(); while ($row = mysql_fetch_assoc($result)) { $taglistArray[] = array( $row['id'], $row['name'], $row['tag'], $row['color'] ); } return $taglistArray; } private function GetWordlistArray() { $wordlistArray=""; $result = $this->GetWordlist(); while ($row = mysql_fetch_assoc($result)) { $listString = file_get_contents("wordlist/".$row['id'].".txt"); $listString = preg_replace("/<div>/u", "\n", $listString); $listString = preg_replace("/<\/div>/u", "", $listString); $listString = preg_replace("/<span(.*?)>/u", "", $listString); $listString = preg_replace("/<\/span>/u", "", $listString); //$listString = preg_replace("/\n/u", "|", $listString); $wordlistArray2 = explode( "\n", $listString ); usort($wordlistArray2,'sortFunction'); foreach ( $wordlistArray2 as $index=>$value ) { $wordlistArray2[$index] = implode("○?", preg_split("/(?<!^)(?!$)/u", $value)); } foreach ( $wordlistArray2 as $index=>$value ) { if ($value=="") unset($wordlistArray2[$index]); } $listString = implode("|", $wordlistArray2); if ( $listString[0]=="|" ) $listString = substr($listString, 1); $wordlistArray[] = array( $row['id'], $row['name'], $listString ); } return $wordlistArray; } protected function GetBooksInfo($bookId) { $result = $this->GetSectionsByID($bookId); while ($row = mysql_fetch_assoc($result)) { $bookName = $row['name']; } $data = array(); $data['bookName'] = $bookName; return $data; } } ?>