changeset 39:8347776a44fc extractapp

embedded topic, taglist,book metadata in file
author Zoe Hong <zhong@mpiwg-berlin.mpg.de
date Sun, 08 Mar 2015 22:28:21 +0100
parents 4b3da4802998
children 2e938dc046db
files develop/config/config.php develop/data/parsing_files/1.txt develop/models/extractapp.php develop/views/Extractapp/TaggingText.php
diffstat 4 files changed, 125 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/develop/config/config.php	Fri Mar 06 17:56:48 2015 +0100
+++ b/develop/config/config.php	Sun Mar 08 22:28:21 2015 +0100
@@ -2,11 +2,12 @@
 
 // localhost
 $systemNAME = "interface";
-$mysql_database = "Gazetteer";
+$mysql_database = "Gazetteers";
 $mysql_server = "localhost";
 $mysql_user = "root";
-$mysql_password = "admin";
+$mysql_password = "root";
 
-$system_root_url = "http://localgazetteers-dev/extraction-interface/develop";
+$system_root_url = "http://localhost:1080/extraction-interface/develop";
+
 
 ?>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/develop/data/parsing_files/1.txt	Sun Mar 08 22:28:21 2015 +0100
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<text>
+  <topic>2</topic>
+  <book category="localgazetteer">
+    <title>南陵縣志</title>
+    <author>qwer</author>
+    <year>1999</year>
+    <pagenumber>333</pagenumber>
+  </book>
+
+  <taglist>
+    <id>2</id>
+    <name>人名</name>
+    <tag>person</tag>
+    <color>rgb(8, 148, 255)</color>
+  </taglist>
+  <taglist>
+    <id>29</id>
+    <name>入仕時間</name>
+    <tag>entry_time</tag>
+    <color>rgb(174, 179, 39)</color>
+  </taglist>
+  <taglist>
+    <id>31</id>
+    <name>物產名稱</name>
+    <tag>product_name</tag>
+    <color>rgb(227, 63, 227)</color>
+  </taglist>
+
+  <text_content>
+  
+  【102】    契本工課鈔肆貫     
+物産    按淳属嚴郡爲
+〈古揚州之境〉而其物産之見於經    者曰陽鳥攸居而已曰篠簜既敷而已曰厥草惟    夭厥木惟喬而已固今日諸郡縣之所同而非止    嚴陵属縣爲然也謹擇其有資於民生服食噐用    
+者載焉 
+<shu>〈糓〉</shu>〈糓〉      秔
+<shu>〈糓〉</shu>有
+<shu>〈糓〉</shu><product_name>早稻</product_name>
+<shu>〈糓〉</shu><product_name>晚稻</product_name>
+<shu>〈糓〉</shu><product_name>紅稻</product_name>
+<shu>〈糓〉</shu><product_name>白稻</product_name>      
+
+<shu>〈糯〉</shu>〈糯〉有
+<shu>〈糯〉</shu><product_name>秋糯</product_name>
+<shu>〈糯〉</shu><product_name>晩糯</product_name>  
+
+
+  </text_content>
+</text>
\ No newline at end of file
--- a/develop/models/extractapp.php	Fri Mar 06 17:56:48 2015 +0100
+++ b/develop/models/extractapp.php	Sun Mar 08 22:28:21 2015 +0100
@@ -8,7 +8,8 @@
 		return array("Index Value 1", "Value 2", "Value 3");
 	}
     
-    protected $section_id = 0, $data_path, $file_id = 0, $current_fileId=0, $branch_id = 0, $user_id = 0, $lg_text = "", $topic = 0;
+    protected $section_id = 0, $data_path, $file_id = 0, $current_fileId=0, 
+        $branch_id = 0, $user_id = 0, $lg_text = "", $topic = 0, $taglist_infile = "", $book_meta = "";
     public $messages = "";
 
     private function Initialize($_urlvalues) {
@@ -193,26 +194,49 @@
     */
 
 
+    private function Taglist_infileUpToDate($taglistArray) {
+        // compare $this->taglist_infile is the same as $taglistArray
+        $taglist_infile = $this->taglist_infile;
+        if (count($taglist_infile) != count($taglistArray)) {
+            return false;
+        }
 
+        foreach ($taglistArray as $row_indb) {
+            foreach ($taglist_infile as $row) {
+                //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] )
+                if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) {
+
+                } else {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
     // === for tagging ===
     public function StartTagging() {
        
-        // $this->Initialize($urlvalues);
-
         $section_id = $this->section_id;
         $stringInput = $this->lg_text;
-        
+
+        $data = array();    // data array to be passed to view
 
         //$taglistArray = $this->GetTaglistArray();
         //for GetTaglistByTopicID: 
         $taglistArray = $this->GetTaglistByTopicID($this->GetTopic());
+        
+        // TODO: check if taglist_infile is up-to-date
+        if ( !$this->Taglist_infileUpToDate($taglistArray)) {
+            $data['taglist_infile'] = $this->taglist_infile;
+        } 
+        
+
         // topic list
         $topiclistArray = $this->GetTopiclistArray();
 
 
         $wordlistArray = $this->GetWordlistArray();
 
-        $data = array();
         $data['stringInput'] = $stringInput;
         $data['taglistArray'] = $taglistArray;
         $data['wordlistArray'] = $wordlistArray;
@@ -1119,6 +1143,31 @@
         return $data;
     }
 
+    private function ParseMetaData($filename) {
+        $text = file_get_contents($filename);
+        $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string");
+
+        $this->topic = $xml->topic; // set topic id
+
+        // get taglist in file
+        $taglist_infile = $xml->taglist;
+        $taglistArray = array();
+        foreach ($taglist_infile as $row) {
+            array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color));
+        }
+        $this->taglist_infile = $taglistArray; 
+
+        // TODO: get meta data of book
+        $this->book_meta = $xml->book;
+
+        // echo $taglist->name.", ".$taglist->tag."," .$taglist->color;
+        // --- detect if the taglist set is up-to-date or not ---
+
+
+        $contentString = $xml->text_content->asXML();
+
+        return $contentString;
+    }
     private function GetSectionContent() {
         $section_id = $this->GetSectionId();
         $section_info = $this->GetSectionInfo();
@@ -1130,8 +1179,10 @@
         $contentString="";
         $data_path = $this->GetDataPath();
         if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) {
-            $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt");
-            $stringInput = $contentString;
+            $filename = $data_path."parsing_files/".$section_id.".txt";
+             // --- parsing meta data
+            $stringInput = $this->ParseMetaData($filename);
+            // ----
 
             // if the text is from file system
             $stringInput = preg_replace("/ /u", "○", $stringInput);
--- a/develop/views/Extractapp/TaggingText.php	Fri Mar 06 17:56:48 2015 +0100
+++ b/develop/views/Extractapp/TaggingText.php	Sun Mar 08 22:28:21 2015 +0100
@@ -11,6 +11,7 @@
 $info = $viewmodel['info']; 
 $messages = $viewmodel['messages'];
 
+$taglist_infile = $viewmodel['taglist_infile'];
 
 ?>
 
@@ -61,7 +62,7 @@
 if (_GET['id']) {
     
     var info = JSON.parse('<?php echo json_encode($info) ?>');
-    var redirectUrl = "http://localhost:1080/localmonographs/develop/Extractapp/TaggingText";
+    var redirectUrl = "http://localhost:1080/extraction-interface/develop/Extractapp/TaggingText";
     var section_id = info['section_id'];
 
     var form = $('<form action="' + redirectUrl + '" method="post">' +
@@ -71,6 +72,18 @@
         $(form).submit();
 }
 // ====
+// --- check if taglist in file is up-to-date ---
+var check_taglist = JSON.parse('<?php echo json_encode($taglist_infile) ?>');
+if (check_taglist) {
+    // TODO: pop up to ask if load the latest taglist
+    var retVal = confirm("The tag list in the file is not up-to-date. \nDo you want to load file with new tag list?");
+    if( retVal == true ){
+        <?php $$taglistArray = $taglist_infile; ?>
+        alert("Load with new tag list.");
+    }else{
+        alert("You are using the old version of tag list.");
+    }
+}