view develop/models/extractapp.php @ 6:63e08b98032f

rewrite extraction interface into PHP MVC architecture. (Although js hasn't been rewritten into MVC, it's fitted into the current PHP MVC architecture.) - The root of the new PHP MVC is at 'develop/'. - extraction interface is called "Extractapp" with several action, eg TaggingText, EditWordlist, EditTaglist, ExportTable.
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Thu, 05 Feb 2015 16:07:53 +0100
parents
children 584b1623e9ef
line wrap: on
line source



<?php

class ExtractappModel extends BaseModel{
    
	public function Index() {
		return array("Index Value 1", "Value 2", "Value 3");
	}
    
    protected $section_id, $data_path;


    private function Initialize($_urlvalues) {
        $this->SetSectionId($_urlvalues);
    }

    // === for tagging ===
    public function StartTagging($urlvalues) {
       
        $this->Initialize($urlvalues);

        $section_id = $this->GetSectionId();
        
        $stringInput = $this->GetSectionContent();

        $taglistArray = $this->GetTaglistArray();

        $wordlistArray = $this->GetWordlistArray();

        $data = array();
        $data['stringInput'] = $stringInput;
        $data['taglistArray'] = $taglistArray;
        $data['wordlistArray'] = $wordlistArray;
        $data['section_id'] = $section_id;


        return $data;
        
    }

    public function SaveFullText($postdata) {
        if ($postdata['text']){
            $date = date('Y_m_d_H_i_s', time());
            if ( file_exists("data/parsing_files/".$postdata['filename'].".txt") ) {
                $oldFile = file_get_contents("data/parsing_files/".$postdata['filename'].".txt");
                file_put_contents("data/parsing_files/".$postdata['filename']."_".$date.".txt", $oldFile);
            }
            
            if (get_magic_quotes_gpc()) {
                $require = stripslashes($postdata['text']);
            } else {
                $require = $postdata['text'];
            }
            
            $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require);
            $require = preg_replace('/&amp;/u', "&", $require); 
            $require = preg_replace("/○/u", " ", $require); 
            $require = preg_replace("/<br>/u", "\n", $require);
            $require = preg_replace("/<br>/u", "\n", $require);
            file_put_contents("data/parsing_files/".$postdata['filename'].".txt", $require);
        }

    }



    // === for export table ===
    public function ExportTable($urlvalues, $postdata) {
        $this->Initialize($urlvalues);
        $content = $postdata['content'];
        
    
        // outputTableArray:
        $section_id = $this->GetSectionId();

        $section_info = $this->GetSectionInfo();

        $sectionName = $section_info['sectionName'];
        $bookId = $section_info['bookId'];

        $books_info = $this->GetBooksInfo($bookId);
        $bookName = $books_info['bookName'];

        $outputTableArray = array();

        $taglistArray = $this->GetTaglistArray();
        
        $outputTableArray[0]=array();
        $outputTableArray[0][0]=array();
        $outputTableArray[0][1]=array();
        foreach ( $taglistArray as $value ) {
            $outputTableArray[0][0][$value[2]] = $value[1];
            $outputTableArray[0][1][$value[2]] = $value[1]."(Title)";
        }
        $outputTableArray[0]["other"] = "其他";
        $outputTableArray[0]["page"] = "頁數";
        $outputTableArray[0]["full"] = "全文";

        foreach ( $taglistArray as $tagValue ) {
            $content = preg_replace("/<\/".$tagValue[2].">○*<".$tagValue[2].">/u", "", $content);
            $content = preg_replace("/<".$tagValue[2].">[ ]*<\/".$tagValue[2].">/u", "", $content);
        }

        $contentLineArray = explode( "<br>", $content );

        $count=0;
        $pageNow=NULL;
        foreach ( $contentLineArray as $value ) {
            $count++;
            $recordString = $value;
            $otherString = $recordString;
            //echo $recordString."<br>\n";
            if ( preg_match("/【<a(.*?)>(.*?)<\/a>】/u", $recordString, $matches) ) {
                $pageNow = $matches[2];
            }
            foreach ( $taglistArray as $tagValue ) {
                if ( preg_match_all("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", $recordString, $matches, PREG_SET_ORDER) ) {
                    foreach ( $matches as $matchesValue ) {
                        $matchesValue[1] = preg_replace("/○/u", "", $matchesValue[1]);
                        if ( preg_match_all("/〈(.*?)〉/u", $matchesValue[1], $matches2, PREG_SET_ORDER) ) {
                            foreach ( $matches2 as $matches2Value ) {
                                if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) {
                                    $outputTableArray[$count][0][$tagValue[2]] .= ";".$matches2Value[1];
                                } else {
                                    $outputTableArray[$count][0][$tagValue[2]] = $matches2Value[1];
                                }
                            }
                        } else {
                            if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) {
                                $outputTableArray[$count][0][$tagValue[2]] .= ";".$matchesValue[1];
                            } else {
                                $outputTableArray[$count][0][$tagValue[2]] = $matchesValue[1];
                            }
                        }
                    }
                    $otherString = preg_replace("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", " ", $otherString);   
                }
            }
            $otherString = preg_replace("/○/u", "", $otherString);
            $outputTableArray[$count]["other"] = $otherString;
            $outputTableArray[$count]["page"] = $pageNow;
            $value = preg_replace("/>/u", "&gt;", $value);
            $value = preg_replace("/</u", "&lt;", $value);
            $outputTableArray[$count]["full"] = $value;
        }

        foreach ( $outputTableArray as $arrayIndex => $arrayValue ) {
            if ( !isset($arrayValue[0]["person"]) ) {
                unset($outputTableArray[$arrayIndex]);
            }
        }


        $data = array();

        $data['outputTableArray'] = $outputTableArray;
        $data['bookId'] = $bookId;
        $data['section_id'] = $section_id;
        $data['bookName'] = $bookName;
        $data['sectionName'] = $sectionName;

        return $data;

    }
    
    // === for manage tag list ===
    public function EditTaglist($urlvalues) {
        $this->Initialize($urlvalues);
        $result = $this->GetTaglist();
        $taglistArray = array();
        while ($row = mysql_fetch_assoc($result)) {
            $taglistArray[$row['id']] = array($row['name'], $row['tag'], $row['color']);
        }

        $data = array();
        $data['taglistArray'] = $taglistArray;
        return $data;
       
    }

    private function _GetTag($_postdata) {
        if (get_magic_quotes_gpc()) {
            $id = stripslashes($_postdata['id']);
            $name = stripslashes($_postdata['name']);
            $tag = stripslashes($_postdata['tag']);
            $color = stripslashes($_postdata['color']);
        } else {
            $id = $_postdata['id'];
            $name = $_postdata['name'];
            $tag = $_postdata['tag'];
            $color = $_postdata['color'];
        }
        return array($id, $name, $tag, $color);
    }

    public function NewTagElement($postdata) {
        if ($postdata['id']){   
            list($id, $name, $tag, $color) = $this->_GetTag($postdata);

            $query1 = sprintf("INSERT INTO `taglist` (`id`, `name`, `tag`, `color`, `systemName`) VALUES (%s, %s, %s, %s, %s)",
                                $this->GetSQLValueString($id, "int"),
                                $this->GetSQLValueString($name, "text"),
                                $this->GetSQLValueString($tag, "text"),
                                $this->GetSQLValueString($color, "text"),
                                $this->GetSQLValueString($this->systemNAME, "text"));
            $result1 = mysql_query($query1);
        }

    }

    public function SaveTagElement($postdata) {
        if ($postdata['id']){   
            list($id, $name, $tag, $color) = $this->_GetTag($postdata);
            
            $queryInsert = sprintf("UPDATE taglist SET `name`=%s, `tag`=%s, `color`=%s WHERE `id`=%s",
                                $this->GetSQLValueString($name, "text"),
                                $this->GetSQLValueString($tag, "text"),
                                $this->GetSQLValueString($color, "text"),
                                $this->GetSQLValueString($id, "int"));
            $resultInsert = mysql_query($queryInsert);
          
        }

    }

    public function DeleteTag($postdata) {
        if ($postdata['id']) {
            $queryInsert = sprintf("DELETE FROM `taglist` WHERE `id` = %s", stripslashes($postdata['id']));
            $resultInsert = mysql_query($queryInsert);
        }

    }
    // =========================== 

    // === for manage wordlist ===
    public function EditWordlist($urlvalues) {
        $this->Initialize($urlvalues);
        $result = $this->GetWordlist();
        $wordlistArray = array();
        while ($row = mysql_fetch_assoc($result)) {
            $wordlistArray[$row['id']] = $row['name'];
        }

        $data = array();
        $data['wordlistArray'] = $wordlistArray;
        return $data;
    }


    public function AddNewList($postdata) {
        if ($postdata['text']){ 
            if (get_magic_quotes_gpc()) {
                $name = stripslashes($postdata['text']);
            } else {
                $name = $postdata['text'];
            }

            $query1 = sprintf("INSERT INTO `wordlist` (`name`, `systemName`) VALUES (%s, %s)",
                            $this->GetSQLValueString($name, "text"),
                            $this->GetSQLValueString($this->systemNAME, "text"));
            $result1 = mysql_query($query1);
            file_put_contents( "data/wordlist/".mysql_insert_id().".txt", "(empty now)");
        }
    }  


    public function SaveWordlist($postdata) {

        if ($postdata['text']){
            $date = date('Y_m_d_H_i_s', time());
            if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) {
                $filename = "data/wordlist/".$postdata['filename']."_".$date.".txt";
                $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt");
                file_put_contents($filename, $oldFile);
            } else {
                $filename = "data/wordlist/".$postdata['filename'].".txt";
            }


            if (get_magic_quotes_gpc()) {
                $require = stripslashes($postdata['text']);
            } else {
                $require = $postdata['text'];
            }

            $require = preg_replace("/<br>/u", "<br>", $require);
            file_put_contents($filename, $require);
        }
        /*
        if ($postdata['text']){
            $date = date('Y_m_d_H_i_s', time());
            if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) {
                $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt");
                file_put_contents("data/wordlist/".$postdata['filename']."_".$date.".txt", $oldFile);
            }
        
            if (get_magic_quotes_gpc()) {
                $require = stripslashes($postdata['text']);
            } else {
                $require = $postdata['text'];
            }

            // $require = preg_replace("/<br>/u", "", $require);
            file_put_contents("data/wordlist/".$postdata['filename'].".txt", $require);
        }
        */
    }


    // =======================================

    public function sortFunction($a,$b) {
        return strlen($b)-strlen($a);
    }


    private function SetSectionId($_urlvalues) {
        // TODO: maybe get user info also
        // get book id from url
        if ($_urlvalues['id'] != "") {
            $section_id = $_urlvalues['id'];
        } else {
            return json_encode("Error: No section id");
            /* ???? */
            /* 
            $get_book_id = $urlvalues['book'];
            $get_start = $urlvalues['start'];
            $get_end = $urlvalues['end'];
            */
        }
        $this->section_id = $section_id;
        
    }

    private function GetSectionId() {
        if ($this->section_id) {
            return $this->section_id;
        } else {
            return json_encode("Error: No section id");
        }
    }
    
    private function GetSectionInfo() {
        $section_id = $this->GetSectionId();
        if (!is_numeric($section_id)){
            return $section_id;
        }

        $result = $this->GetSectionsByID($section_id);

        
        while ($row = mysql_fetch_assoc($result)) {
            $bookId=$row['books_id'];
            $startPage=$row['start_page'];
            $endPage=$row['end_page'];
            $sectionName = $row['name'];
        }

        $data = array();
        $data['bookId'] = $bookId;
        $data['startPage'] = $startPage;
        $data['endPage'] = $endPage;
        $data['sectionName'] = $sectionName;

        return $data;
    }

    private function GetSectionContent() {
        $section_id = $this->GetSectionId();
        $section_info = $this->GetSectionInfo();

        $bookId = $section_info['bookId'];
        $startPage = $section_info['startPage'];
        $endPage = $section_id['endPage'];


        $contentString="";
        $data_path = $this->GetDataPath();
        if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) {
            $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt");
            $stringInput = $contentString;
            $stringInput = preg_replace("/ /u", "○", $stringInput);
            $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
            $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
        } else {
            $query = sprintf("SELECT `content`, `line`, `books_id` FROM `contents` WHERE `books_id`=\"%s\" AND `line`>=%d AND `line`<=%d", $bookId, $startPage, $endPage);
            $result = mysql_query($query);
            while ($row = mysql_fetch_assoc($result)) {
                $contentString.="【".$row['line']."】".$row['content']."\n";
            }
            $stringInput = $contentString;
            $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput);
            $stringInput = preg_replace("/ /u", "○", $stringInput);
            $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
            $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
        }

        return $stringInput;
    }
    
    private function GetDataPath() {
        return getcwd()."/data/"; // get the current file path, which is getcwd(), and concatenate with "/data/"
    }

    private function GetTaglistArray() {
        $taglistArray="";
        $result = $this->GetTaglist();
        while ($row = mysql_fetch_assoc($result)) {
            $taglistArray[] = array( $row['id'], $row['name'], $row['tag'], $row['color'] );

        }

        return $taglistArray;

    }

    private function GetWordlistArray() {
        $wordlistArray="";
        $result = $this->GetWordlist();
        while ($row = mysql_fetch_assoc($result)) {
            $listString = file_get_contents("wordlist/".$row['id'].".txt");
            $listString = preg_replace("/<div>/u", "\n", $listString);
            $listString = preg_replace("/<\/div>/u", "", $listString);
            $listString = preg_replace("/<span(.*?)>/u", "", $listString);
            $listString = preg_replace("/<\/span>/u", "", $listString);
            //$listString = preg_replace("/\n/u", "|", $listString);
            
            $wordlistArray2 = explode( "\n", $listString );
            usort($wordlistArray2,'sortFunction');
            foreach ( $wordlistArray2 as $index=>$value ) {
                $wordlistArray2[$index] = implode("○?", preg_split("/(?<!^)(?!$)/u", $value));
            }
            foreach ( $wordlistArray2 as $index=>$value ) {
                if ($value=="") unset($wordlistArray2[$index]);
                
            }
            $listString = implode("|", $wordlistArray2);
            
            if ( $listString[0]=="|" ) $listString = substr($listString, 1);
            $wordlistArray[] = array( $row['id'], $row['name'], $listString );
        }


        return $wordlistArray;

    }

    protected function GetBooksInfo($bookId) {
        $result = $this->GetSectionsByID($bookId);
        while ($row = mysql_fetch_assoc($result)) {
            $bookName = $row['name'];
        }

        $data = array();
        $data['bookName'] = $bookName;

        return $data;
    }

    
}



?>