diff develop/models/extractapp.php @ 6:63e08b98032f

rewrite extraction interface into PHP MVC architecture. (Although js hasn't been rewritten into MVC, it's fitted into the current PHP MVC architecture.) - The root of the new PHP MVC is at 'develop/'. - extraction interface is called "Extractapp" with several action, eg TaggingText, EditWordlist, EditTaglist, ExportTable.
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Thu, 05 Feb 2015 16:07:53 +0100
parents
children 584b1623e9ef
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/develop/models/extractapp.php	Thu Feb 05 16:07:53 2015 +0100
@@ -0,0 +1,466 @@
+
+
+<?php
+
+class ExtractappModel extends BaseModel{
+    
+	public function Index() {
+		return array("Index Value 1", "Value 2", "Value 3");
+	}
+    
+    protected $section_id, $data_path;
+
+
+    private function Initialize($_urlvalues) {
+        $this->SetSectionId($_urlvalues);
+    }
+
+    // === for tagging ===
+    public function StartTagging($urlvalues) {
+       
+        $this->Initialize($urlvalues);
+
+        $section_id = $this->GetSectionId();
+        
+        $stringInput = $this->GetSectionContent();
+
+        $taglistArray = $this->GetTaglistArray();
+
+        $wordlistArray = $this->GetWordlistArray();
+
+        $data = array();
+        $data['stringInput'] = $stringInput;
+        $data['taglistArray'] = $taglistArray;
+        $data['wordlistArray'] = $wordlistArray;
+        $data['section_id'] = $section_id;
+
+
+        return $data;
+        
+    }
+
+    public function SaveFullText($postdata) {
+        if ($postdata['text']){
+            $date = date('Y_m_d_H_i_s', time());
+            if ( file_exists("data/parsing_files/".$postdata['filename'].".txt") ) {
+                $oldFile = file_get_contents("data/parsing_files/".$postdata['filename'].".txt");
+                file_put_contents("data/parsing_files/".$postdata['filename']."_".$date.".txt", $oldFile);
+            }
+            
+            if (get_magic_quotes_gpc()) {
+                $require = stripslashes($postdata['text']);
+            } else {
+                $require = $postdata['text'];
+            }
+            
+            $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require);
+            $require = preg_replace('/&amp;/u', "&", $require); 
+            $require = preg_replace("/○/u", " ", $require); 
+            $require = preg_replace("/<br>/u", "\n", $require);
+            $require = preg_replace("/<br>/u", "\n", $require);
+            file_put_contents("data/parsing_files/".$postdata['filename'].".txt", $require);
+        }
+
+    }
+
+
+
+    // === for export table ===
+    public function ExportTable($urlvalues, $postdata) {
+        $this->Initialize($urlvalues);
+        $content = $postdata['content'];
+        
+    
+        // outputTableArray:
+        $section_id = $this->GetSectionId();
+
+        $section_info = $this->GetSectionInfo();
+
+        $sectionName = $section_info['sectionName'];
+        $bookId = $section_info['bookId'];
+
+        $books_info = $this->GetBooksInfo($bookId);
+        $bookName = $books_info['bookName'];
+
+        $outputTableArray = array();
+
+        $taglistArray = $this->GetTaglistArray();
+        
+        $outputTableArray[0]=array();
+        $outputTableArray[0][0]=array();
+        $outputTableArray[0][1]=array();
+        foreach ( $taglistArray as $value ) {
+            $outputTableArray[0][0][$value[2]] = $value[1];
+            $outputTableArray[0][1][$value[2]] = $value[1]."(Title)";
+        }
+        $outputTableArray[0]["other"] = "其他";
+        $outputTableArray[0]["page"] = "頁數";
+        $outputTableArray[0]["full"] = "全文";
+
+        foreach ( $taglistArray as $tagValue ) {
+            $content = preg_replace("/<\/".$tagValue[2].">○*<".$tagValue[2].">/u", "", $content);
+            $content = preg_replace("/<".$tagValue[2].">[ ]*<\/".$tagValue[2].">/u", "", $content);
+        }
+
+        $contentLineArray = explode( "<br>", $content );
+
+        $count=0;
+        $pageNow=NULL;
+        foreach ( $contentLineArray as $value ) {
+            $count++;
+            $recordString = $value;
+            $otherString = $recordString;
+            //echo $recordString."<br>\n";
+            if ( preg_match("/【<a(.*?)>(.*?)<\/a>】/u", $recordString, $matches) ) {
+                $pageNow = $matches[2];
+            }
+            foreach ( $taglistArray as $tagValue ) {
+                if ( preg_match_all("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", $recordString, $matches, PREG_SET_ORDER) ) {
+                    foreach ( $matches as $matchesValue ) {
+                        $matchesValue[1] = preg_replace("/○/u", "", $matchesValue[1]);
+                        if ( preg_match_all("/〈(.*?)〉/u", $matchesValue[1], $matches2, PREG_SET_ORDER) ) {
+                            foreach ( $matches2 as $matches2Value ) {
+                                if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) {
+                                    $outputTableArray[$count][0][$tagValue[2]] .= ";".$matches2Value[1];
+                                } else {
+                                    $outputTableArray[$count][0][$tagValue[2]] = $matches2Value[1];
+                                }
+                            }
+                        } else {
+                            if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) {
+                                $outputTableArray[$count][0][$tagValue[2]] .= ";".$matchesValue[1];
+                            } else {
+                                $outputTableArray[$count][0][$tagValue[2]] = $matchesValue[1];
+                            }
+                        }
+                    }
+                    $otherString = preg_replace("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", " ", $otherString);   
+                }
+            }
+            $otherString = preg_replace("/○/u", "", $otherString);
+            $outputTableArray[$count]["other"] = $otherString;
+            $outputTableArray[$count]["page"] = $pageNow;
+            $value = preg_replace("/>/u", "&gt;", $value);
+            $value = preg_replace("/</u", "&lt;", $value);
+            $outputTableArray[$count]["full"] = $value;
+        }
+
+        foreach ( $outputTableArray as $arrayIndex => $arrayValue ) {
+            if ( !isset($arrayValue[0]["person"]) ) {
+                unset($outputTableArray[$arrayIndex]);
+            }
+        }
+
+
+        $data = array();
+
+        $data['outputTableArray'] = $outputTableArray;
+        $data['bookId'] = $bookId;
+        $data['section_id'] = $section_id;
+        $data['bookName'] = $bookName;
+        $data['sectionName'] = $sectionName;
+
+        return $data;
+
+    }
+    
+    // === for manage tag list ===
+    public function EditTaglist($urlvalues) {
+        $this->Initialize($urlvalues);
+        $result = $this->GetTaglist();
+        $taglistArray = array();
+        while ($row = mysql_fetch_assoc($result)) {
+            $taglistArray[$row['id']] = array($row['name'], $row['tag'], $row['color']);
+        }
+
+        $data = array();
+        $data['taglistArray'] = $taglistArray;
+        return $data;
+       
+    }
+
+    private function _GetTag($_postdata) {
+        if (get_magic_quotes_gpc()) {
+            $id = stripslashes($_postdata['id']);
+            $name = stripslashes($_postdata['name']);
+            $tag = stripslashes($_postdata['tag']);
+            $color = stripslashes($_postdata['color']);
+        } else {
+            $id = $_postdata['id'];
+            $name = $_postdata['name'];
+            $tag = $_postdata['tag'];
+            $color = $_postdata['color'];
+        }
+        return array($id, $name, $tag, $color);
+    }
+
+    public function NewTagElement($postdata) {
+        if ($postdata['id']){   
+            list($id, $name, $tag, $color) = $this->_GetTag($postdata);
+
+            $query1 = sprintf("INSERT INTO `taglist` (`id`, `name`, `tag`, `color`, `systemName`) VALUES (%s, %s, %s, %s, %s)",
+                                $this->GetSQLValueString($id, "int"),
+                                $this->GetSQLValueString($name, "text"),
+                                $this->GetSQLValueString($tag, "text"),
+                                $this->GetSQLValueString($color, "text"),
+                                $this->GetSQLValueString($this->systemNAME, "text"));
+            $result1 = mysql_query($query1);
+        }
+
+    }
+
+    public function SaveTagElement($postdata) {
+        if ($postdata['id']){   
+            list($id, $name, $tag, $color) = $this->_GetTag($postdata);
+            
+            $queryInsert = sprintf("UPDATE taglist SET `name`=%s, `tag`=%s, `color`=%s WHERE `id`=%s",
+                                $this->GetSQLValueString($name, "text"),
+                                $this->GetSQLValueString($tag, "text"),
+                                $this->GetSQLValueString($color, "text"),
+                                $this->GetSQLValueString($id, "int"));
+            $resultInsert = mysql_query($queryInsert);
+          
+        }
+
+    }
+
+    public function DeleteTag($postdata) {
+        if ($postdata['id']) {
+            $queryInsert = sprintf("DELETE FROM `taglist` WHERE `id` = %s", stripslashes($postdata['id']));
+            $resultInsert = mysql_query($queryInsert);
+        }
+
+    }
+    // =========================== 
+
+    // === for manage wordlist ===
+    public function EditWordlist($urlvalues) {
+        $this->Initialize($urlvalues);
+        $result = $this->GetWordlist();
+        $wordlistArray = array();
+        while ($row = mysql_fetch_assoc($result)) {
+            $wordlistArray[$row['id']] = $row['name'];
+        }
+
+        $data = array();
+        $data['wordlistArray'] = $wordlistArray;
+        return $data;
+    }
+
+
+    public function AddNewList($postdata) {
+        if ($postdata['text']){ 
+            if (get_magic_quotes_gpc()) {
+                $name = stripslashes($postdata['text']);
+            } else {
+                $name = $postdata['text'];
+            }
+
+            $query1 = sprintf("INSERT INTO `wordlist` (`name`, `systemName`) VALUES (%s, %s)",
+                            $this->GetSQLValueString($name, "text"),
+                            $this->GetSQLValueString($this->systemNAME, "text"));
+            $result1 = mysql_query($query1);
+            file_put_contents( "data/wordlist/".mysql_insert_id().".txt", "(empty now)");
+        }
+    }  
+
+
+    public function SaveWordlist($postdata) {
+
+        if ($postdata['text']){
+            $date = date('Y_m_d_H_i_s', time());
+            if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) {
+                $filename = "data/wordlist/".$postdata['filename']."_".$date.".txt";
+                $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt");
+                file_put_contents($filename, $oldFile);
+            } else {
+                $filename = "data/wordlist/".$postdata['filename'].".txt";
+            }
+
+
+            if (get_magic_quotes_gpc()) {
+                $require = stripslashes($postdata['text']);
+            } else {
+                $require = $postdata['text'];
+            }
+
+            $require = preg_replace("/<br>/u", "<br>", $require);
+            file_put_contents($filename, $require);
+        }
+        /*
+        if ($postdata['text']){
+            $date = date('Y_m_d_H_i_s', time());
+            if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) {
+                $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt");
+                file_put_contents("data/wordlist/".$postdata['filename']."_".$date.".txt", $oldFile);
+            }
+        
+            if (get_magic_quotes_gpc()) {
+                $require = stripslashes($postdata['text']);
+            } else {
+                $require = $postdata['text'];
+            }
+
+            // $require = preg_replace("/<br>/u", "", $require);
+            file_put_contents("data/wordlist/".$postdata['filename'].".txt", $require);
+        }
+        */
+    }
+
+
+    // =======================================
+
+    public function sortFunction($a,$b) {
+        return strlen($b)-strlen($a);
+    }
+
+
+    private function SetSectionId($_urlvalues) {
+        // TODO: maybe get user info also
+        // get book id from url
+        if ($_urlvalues['id'] != "") {
+            $section_id = $_urlvalues['id'];
+        } else {
+            return json_encode("Error: No section id");
+            /* ???? */
+            /* 
+            $get_book_id = $urlvalues['book'];
+            $get_start = $urlvalues['start'];
+            $get_end = $urlvalues['end'];
+            */
+        }
+        $this->section_id = $section_id;
+        
+    }
+
+    private function GetSectionId() {
+        if ($this->section_id) {
+            return $this->section_id;
+        } else {
+            return json_encode("Error: No section id");
+        }
+    }
+    
+    private function GetSectionInfo() {
+        $section_id = $this->GetSectionId();
+        if (!is_numeric($section_id)){
+            return $section_id;
+        }
+
+        $result = $this->GetSectionsByID($section_id);
+
+        
+        while ($row = mysql_fetch_assoc($result)) {
+            $bookId=$row['books_id'];
+            $startPage=$row['start_page'];
+            $endPage=$row['end_page'];
+            $sectionName = $row['name'];
+        }
+
+        $data = array();
+        $data['bookId'] = $bookId;
+        $data['startPage'] = $startPage;
+        $data['endPage'] = $endPage;
+        $data['sectionName'] = $sectionName;
+
+        return $data;
+    }
+
+    private function GetSectionContent() {
+        $section_id = $this->GetSectionId();
+        $section_info = $this->GetSectionInfo();
+
+        $bookId = $section_info['bookId'];
+        $startPage = $section_info['startPage'];
+        $endPage = $section_id['endPage'];
+
+
+        $contentString="";
+        $data_path = $this->GetDataPath();
+        if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) {
+            $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt");
+            $stringInput = $contentString;
+            $stringInput = preg_replace("/ /u", "○", $stringInput);
+            $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
+            $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
+        } else {
+            $query = sprintf("SELECT `content`, `line`, `books_id` FROM `contents` WHERE `books_id`=\"%s\" AND `line`>=%d AND `line`<=%d", $bookId, $startPage, $endPage);
+            $result = mysql_query($query);
+            while ($row = mysql_fetch_assoc($result)) {
+                $contentString.="【".$row['line']."】".$row['content']."\n";
+            }
+            $stringInput = $contentString;
+            $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput);
+            $stringInput = preg_replace("/ /u", "○", $stringInput);
+            $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
+            $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
+        }
+
+        return $stringInput;
+    }
+    
+    private function GetDataPath() {
+        return getcwd()."/data/"; // get the current file path, which is getcwd(), and concatenate with "/data/"
+    }
+
+    private function GetTaglistArray() {
+        $taglistArray="";
+        $result = $this->GetTaglist();
+        while ($row = mysql_fetch_assoc($result)) {
+            $taglistArray[] = array( $row['id'], $row['name'], $row['tag'], $row['color'] );
+
+        }
+
+        return $taglistArray;
+
+    }
+
+    private function GetWordlistArray() {
+        $wordlistArray="";
+        $result = $this->GetWordlist();
+        while ($row = mysql_fetch_assoc($result)) {
+            $listString = file_get_contents("wordlist/".$row['id'].".txt");
+            $listString = preg_replace("/<div>/u", "\n", $listString);
+            $listString = preg_replace("/<\/div>/u", "", $listString);
+            $listString = preg_replace("/<span(.*?)>/u", "", $listString);
+            $listString = preg_replace("/<\/span>/u", "", $listString);
+            //$listString = preg_replace("/\n/u", "|", $listString);
+            
+            $wordlistArray2 = explode( "\n", $listString );
+            usort($wordlistArray2,'sortFunction');
+            foreach ( $wordlistArray2 as $index=>$value ) {
+                $wordlistArray2[$index] = implode("○?", preg_split("/(?<!^)(?!$)/u", $value));
+            }
+            foreach ( $wordlistArray2 as $index=>$value ) {
+                if ($value=="") unset($wordlistArray2[$index]);
+                
+            }
+            $listString = implode("|", $wordlistArray2);
+            
+            if ( $listString[0]=="|" ) $listString = substr($listString, 1);
+            $wordlistArray[] = array( $row['id'], $row['name'], $listString );
+        }
+
+
+        return $wordlistArray;
+
+    }
+
+    protected function GetBooksInfo($bookId) {
+        $result = $this->GetSectionsByID($bookId);
+        while ($row = mysql_fetch_assoc($result)) {
+            $bookName = $row['name'];
+        }
+
+        $data = array();
+        $data['bookName'] = $bookName;
+
+        return $data;
+    }
+
+    
+}
+
+
+
+?>