changeset 115:507070df62e6 extractapp

add escape rule for regular expression
author Calvin Yeh <cyeh@mpiwg-berlin.mpg.de>
date Thu, 28 Sep 2017 17:13:39 +0200
parents 7d6a107c37da
children 0ef426b5a1b9
files models/extractapp.php
diffstat 1 files changed, 35 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/models/extractapp.php	Thu Sep 28 14:01:59 2017 +0200
+++ b/models/extractapp.php	Thu Sep 28 17:13:39 2017 +0200
@@ -64,7 +64,7 @@
         $stringInput = $lg_text;
         $stringInput = preg_replace("/ /u", "○", $stringInput);
         $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
-        $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$this->book_id."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
+        $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"#\">\\1</a>】", $stringInput);
         $lg_text = $stringInput;
 
         $this->lg_text = $lg_text;
@@ -102,12 +102,16 @@
         $book_meta->dynasty = $b['dynasty'];
         $book_meta->start_year = $b['start_year'];
         $book_meta->end_year = $b['end_year'];
+        $book_meta->book_year = $b['book_year'];
+        $book_meta->edition_year = $b['edition_year'];
         $book_meta->line = $b['line'];
         $book_meta->volume = $b['volume'];
         $book_meta->author = $b['author'];
         $book_meta->edition = $b['edition'];
         $book_meta->in_jibengujiku = $b['in_jibengujiku'];
         $book_meta->admin_type = $b['admin_type'];
+        $book_meta->book_year = $b['book_year'];
+        $book_meta->edition_year = $b['edition_year'];
 
         $coordinates = $section_meta['section']['coordinates_books'];
         $book_meta->x = $coordinates['x'];
@@ -147,7 +151,7 @@
         $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput);
         $stringInput = preg_replace("/ /u", "○", $stringInput);
         $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
-        $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$this->book_id."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
+        $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"#\">\\1</a>】", $stringInput);
         $lg_text = $stringInput;
 
         $this->lg_text = $lg_text;
@@ -161,7 +165,6 @@
         $this->lg_text = $this->GetSectionContent();
     }
 
-
     public function SetInfoFromPreviousPage($_postdata) {
         /**
          * Get information from post data passsed by the previous page, and set variables in $this.
@@ -214,7 +217,6 @@
 
     }
 
-
     private function TaglistSubsetIn($list1, $list2) {    // $l1 is a subset of $l2 or not
         // l1 and l2: array( $row['id'], $row['name'], $row['tag'], $row['color'] )
         $cnt_list1 = count($list1);
@@ -304,6 +306,7 @@
         $this->lg_text = $lg_text;
 
     }
+
     public function StartTagging() {
         /**
          * This is the main method for tagging text. It passes all the information to "views/Extractapp/TaggingText.php" view.
@@ -400,7 +403,10 @@
 
             $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require);
             $require = preg_replace('/&amp;/u', "&", $require);
-            $require = preg_replace('/&nbsp;/u', " ", $require); //avoiding invaild xml format
+            $require = preg_replace('/&nbsp;/u', " ", $require);
+            $require = preg_replace('/<font [^><]*>/u', "", $require);
+            $require = preg_replace('/<\/font>/u', "", $require);
+            $require = preg_replace('/ style="background-color: transparent;"/u', "", $require);
             $require = preg_replace("/○/u", " ", $require);
             $require = preg_replace("/<br>/u", "\n", $require);
             $require = preg_replace("/<br>/u", "\n", $require);
@@ -409,7 +415,6 @@
 
     }
 
-
     public function UpdateInfoResponsedFromLGService($response) {
         /**
          *
@@ -468,6 +473,8 @@
             $text .= "<dynasty>".$book->dynasty."</dynasty>\n";
             $text .= "<start_year>".$book->start_year."</start_year>\n";
             $text .= "<end_year>".$book->end_year."</end_year>\n";
+            $text .= "<book_year>".$book->book_year."</book_year>\n";
+            $text .= "<edition_year>".$book->edition_year."</edition_year>\n";
             $text .= "<line>".$book->line."</line>\n";
             $text .= "<volume>".$book->volume."</volume>\n";
             $text .= "<author>".$book->author."</author>\n";
@@ -540,7 +547,10 @@
 
             $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require);
             $require = preg_replace('/&amp;/u', "&", $require);
-            $require = preg_replace('/&nbsp;/u', " ", $require);   //avoiding invaild xml format
+            $require = preg_replace('/&nbsp;/u', " ", $require);
+            $require = preg_replace('/<font [^><]*>/u', "", $require);
+            $require = preg_replace('/<\/font>/u', "", $require);
+            $require = preg_replace('/ style="background-color: transparent;"/u', "", $require);
             $require = preg_replace("/○/u", " ", $require);
             $require = preg_replace("/<br(.*?)>/u", "\n", $require);
             //$require = preg_replace("/<br>/u", "\n", $require);
@@ -638,6 +648,7 @@
 
         return $filenames;
     }
+
     public function LoadSmartRegex($topic_id) {
 
         // Load regex file based on current topic. Only shows the regex in this topic --
@@ -661,7 +672,6 @@
         return;
     }
 
-
     public function SaveSmartRegex($_postdata) {
         if ($_postdata['text']){
 
@@ -714,7 +724,6 @@
         }
     }
 
-
     private function GetTableArray($_taglistArray, $_topic_tag, $_content) {
 
         $outputTableArray = array();
@@ -725,9 +734,10 @@
             $outputTableArray[0][0][$value[2]] = $value[1];
             $outputTableArray[0][1][$value[2]] = $value[1]."(Title)";
         }
-        $outputTableArray[0]["other"] = "其他";
+        //remove other, full columns
+        //$outputTableArray[0]["other"] = "其他";
         $outputTableArray[0]["page"] = "頁數";
-        $outputTableArray[0]["full"] = "全文";
+        //$outputTableArray[0]["full"] = "全文";
 
         // id, name, tag, color in _taglistArray
         foreach ( $_taglistArray as $tagValue ) {
@@ -743,12 +753,13 @@
         foreach ( $contentLineArray as $value ) {
             $count++;
             $recordString = $value;
-            $otherString = $recordString;
+            //$otherString = $recordString;
             //echo $recordString."<br>\n";
             // find hyper link in pattern with <a>...</a>
             if ( preg_match("/【<a(.*?)>(.*?)<\/a>】/u", $recordString, $matches) ) {
                 $pageNow = $matches[2];
             }
+
             foreach ( $_taglistArray as $tagValue ) {
                 $tag_name = $tagValue[2];   // $tagValue[2] is tag_name
 
@@ -772,15 +783,16 @@
                             }
                         }
                     }
-                    $otherString = preg_replace("/<".$tag_name.">(.*?)<\/".$tag_name.">/u", " ", $otherString);
+                    //$otherString = preg_replace("/<".$tag_name.">(.*?)<\/".$tag_name.">/u", " ", $otherString);
                 }
             }
-            $otherString = preg_replace("/○/u", "", $otherString);
-            $outputTableArray[$count]["other"] = $otherString;
+
+            //$otherString = preg_replace("/○/u", "", $otherString);
+            //$outputTableArray[$count]["other"] = $otherString;
             $outputTableArray[$count]["page"] = $pageNow;
-            $value = preg_replace("/>/u", "&gt;", $value);
-            $value = preg_replace("/</u", "&lt;", $value);
-            $outputTableArray[$count]["full"] = $value;
+            //$value = preg_replace("/>/u", "&gt;", $value);
+            //$value = preg_replace("/</u", "&lt;", $value);
+            //$outputTableArray[$count]["full"] = $value;
         }
 
 
@@ -964,6 +976,9 @@
                                 $this->GetSQLValueString($date, "date"),
                                 $this->GetSQLValueString($id, "int"));
             $result = mysql_query($query);
+            if (!$result) {
+                echo 0;
+            }
 
         }
 
@@ -1165,8 +1180,6 @@
         return $data;
     }
 
-
-
     public function UpdateTagsInTopic($_postdata) {
         $topic_id = $_postdata['topic_id'];
         $tag_ids = json_decode(str_replace('\\', '', $_postdata['ids']));
@@ -1246,7 +1259,6 @@
 
     }
 
-
     private function GetTaglistByTopicId($topic_id) {
         $taglistArray = array();
         // select taglist ids from topic_tag_relation table
@@ -1296,7 +1308,6 @@
         return $topiclistArray;
     }
 
-
     private function GetTopicTag($topic_id) {
         $result = $this->GetTopicById($topic_id);
         $row = mysql_fetch_assoc($result);
@@ -1542,7 +1553,7 @@
             // if the text is from file system
             $stringInput = preg_replace("/ /u", "○", $stringInput);
             $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
-            $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
+            $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"#\">\\1</a>】", $stringInput);
 
             $this->branch_id = 1;   // testing at local
 
@@ -1561,7 +1572,7 @@
             $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput);
             $stringInput = preg_replace("/ /u", "○", $stringInput);
             $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
-            $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
+            $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"#\">\\1</a>】", $stringInput);
         }
 
         return $stringInput;