changeset 40:2e938dc046db extractapp

load,save xml file with topic, etc.
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Tue, 10 Mar 2015 13:46:16 +0100
parents 8347776a44fc
children 533a6c39c128
files develop/config/config.php develop/data/parsing_files/1.txt develop/models/extractapp.php develop/views/Extractapp/TaggingText.php
diffstat 4 files changed, 235 insertions(+), 99 deletions(-) [+]
line wrap: on
line diff
--- a/develop/config/config.php	Sun Mar 08 22:28:21 2015 +0100
+++ b/develop/config/config.php	Tue Mar 10 13:46:16 2015 +0100
@@ -1,13 +1,24 @@
 <?php
 
-// localhost
-$systemNAME = "interface";
-$mysql_database = "Gazetteers";
-$mysql_server = "localhost";
-$mysql_user = "root";
-$mysql_password = "root";
+$at_local = false;
 
-$system_root_url = "http://localhost:1080/extraction-interface/develop";
+if (!$at_local) {
+	// host at localgazetteers-dev server
+	$systemNAME = "interface";
+	$mysql_database = "Gazetteer";
+	$mysql_server = "localhost";
+	$mysql_user = "root";
+	$mysql_password = "admin";
+	$system_root_url = "http://localgazetteers-dev/extraction-interface/develop";
 
+} else {
+	// localhost
+	$systemNAME = "interface";
+	$mysql_database = "Gazetteers";
+	$mysql_server = "localhost";
+	$mysql_user = "root";
+	$mysql_password = "root";
+	$system_root_url = "http://localhost:1080/extraction-interface/develop";
+}
 
 ?>
--- a/develop/data/parsing_files/1.txt	Sun Mar 08 22:28:21 2015 +0100
+++ b/develop/data/parsing_files/1.txt	Tue Mar 10 13:46:16 2015 +0100
@@ -1,34 +1,43 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <text>
-  <topic>2</topic>
-  <book category="localgazetteer">
-    <title>南陵縣志</title>
-    <author>qwer</author>
-    <year>1999</year>
-    <pagenumber>333</pagenumber>
-  </book>
-
-  <taglist>
-    <id>2</id>
-    <name>人名</name>
-    <tag>person</tag>
-    <color>rgb(8, 148, 255)</color>
-  </taglist>
-  <taglist>
-    <id>29</id>
-    <name>入仕時間</name>
-    <tag>entry_time</tag>
-    <color>rgb(174, 179, 39)</color>
-  </taglist>
-  <taglist>
-    <id>31</id>
-    <name>物產名稱</name>
-    <tag>product_name</tag>
-    <color>rgb(227, 63, 227)</color>
-  </taglist>
-
-  <text_content>
-  
+<topic>2</topic>
+<book>
+<title>南陵縣志</title>
+<author>qwer</author>
+<year>1999</year>
+<pagenumber>333</pagenumber>
+</book>
+<tagitem>
+<id>31</id>
+<name>物產名稱</name>
+<tag>product_name</tag>
+<color>rgb(227, 63, 227)</color>
+</tagitem>
+<tagitem>
+<id>32</id>
+<name>屬</name>
+<tag>shu</tag>
+<color>rgb(143, 14, 143)</color>
+</tagitem>
+<tagitem>
+<id>17</id>
+<name>無意義</name>
+<tag>null</tag>
+<color>rgb(219, 219, 219)</color>
+</tagitem>
+<tagitem>
+<id>34</id>
+<name>物產產地</name>
+<tag>product_place</tag>
+<color>rgb(7, 64, 209)</color>
+</tagitem>
+<tagitem>
+<id>26</id>
+<name>別名</name>
+<tag>othername</tag>
+<color>rgb(141, 212, 224)</color>
+</tagitem>
+<text_content>
   【102】    契本工課鈔肆貫     
 物産    按淳属嚴郡爲
 〈古揚州之境〉而其物産之見於經    者曰陽鳥攸居而已曰篠簜既敷而已曰厥草惟    夭厥木惟喬而已固今日諸郡縣之所同而非止    嚴陵属縣爲然也謹擇其有資於民生服食噐用    
@@ -43,7 +52,5 @@
 <shu>〈糯〉</shu>〈糯〉有
 <shu>〈糯〉</shu><product_name>秋糯</product_name>
 <shu>〈糯〉</shu><product_name>晩糯</product_name>  
-
-
-  </text_content>
+</text_content>
 </text>
\ No newline at end of file
--- a/develop/models/extractapp.php	Sun Mar 08 22:28:21 2015 +0100
+++ b/develop/models/extractapp.php	Tue Mar 10 13:46:16 2015 +0100
@@ -18,13 +18,6 @@
     }
 
     public function GetTextFromFileId($_postdata) {
-        /*
-        if ($this->current_fileId != 0) {
-            $this->file_id = $this->current_fileId;
-        } else {
-            $this->file_id = $_postdata['fileId'];
-        }
-        */
         $this->file_id = $_postdata['fileId'];
         $branch_id = $_postdata['branchId'];
         $section_id = $_postdata['sectionId'];
@@ -33,15 +26,18 @@
         $this->user_id = $_postdata['userId'];
         $this->section_id = $section_id;
 
-
         $this->section_name = $_postdata['sectionName'];
         $this->book_id = $_postdata['bookId'];
         $this->book_name = $_postdata['bookName'];
 
-        
         // get from URL with file_id
         $lg_text_url = $this->get_text_from_fileId_url.$this->file_id;
-        $lg_text = file_get_contents($lg_text_url);
+        //$lg_text = file_get_contents($lg_text_url);
+        // --- parsing meta data
+        $lg_text = $this->ParseMetaData($lg_text_url);
+
+        // ----
+
 
         $stringInput = $lg_text;
         $stringInput = preg_replace("/ /u", "○", $stringInput);
@@ -54,7 +50,6 @@
     }
 
     public function GetTextFromSectionId($_postdata) {
-        
         $section_id = $_postdata['sectionId'];
         $this->section_id = $section_id;
         $this->user_id = $_postdata['userId'];
@@ -65,7 +60,6 @@
 
         // get from URL with file_id
         $lg_text_url = $this->get_text_from_sectionId_url.$section_id;
-
         $lg_text = file_get_contents($lg_text_url);
 
         $stringInput = $lg_text;
@@ -118,8 +112,12 @@
         if ($_postdata['currentFileId']) {
             $this->current_fileId = $_postdata['currentFileId'];
         }
-
-       
+        if ($_postdata['taglistArray']) {
+            $this->taglist_infile = json_decode($_postdata['taglistArray']);
+        }
+        if ($_postdata['book_meta']) {
+            $this->book_meta = json_decode($_postdata['book_meta']);
+        }
 
     }
     public function InitData($_postdata) {
@@ -194,6 +192,7 @@
     */
 
 
+    // TODO: comparison not correct
     private function Taglist_infileUpToDate($taglistArray) {
         // compare $this->taglist_infile is the same as $taglistArray
         $taglist_infile = $this->taglist_infile;
@@ -202,14 +201,18 @@
         }
 
         foreach ($taglistArray as $row_indb) {
+            $cnt = 0;
             foreach ($taglist_infile as $row) {
                 //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] )
                 if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) {
 
                 } else {
-                    return false;
+                    $cnt ++;
                 }
             }
+            if ($cnt == count($row)) {
+                return false;
+            }
         }
         return true;
     }
@@ -225,11 +228,18 @@
         //for GetTaglistByTopicID: 
         $taglistArray = $this->GetTaglistByTopicID($this->GetTopic());
         
+        $data['taglist_infile'] = $this->taglist_infile;
         // TODO: check if taglist_infile is up-to-date
+        /*
         if ( !$this->Taglist_infileUpToDate($taglistArray)) {
             $data['taglist_infile'] = $this->taglist_infile;
-        } 
+        } else {
+            $data['taglist_infile'] = "";
+        }
+        */
         
+        // book_meta
+        $data['book_meta'] = $this->book_meta;
 
         // topic list
         $topiclistArray = $this->GetTopiclistArray();
@@ -338,6 +348,38 @@
         
 
     }
+    private function AppendMetaData($text_content) {
+        $text = '<?xml version="1.0" encoding="UTF-8"?>';
+        $text .= "\n<text>\n";
+        // topic
+        $text .= "<topic>".$this->topic."</topic>\n";
+        // book meta data
+        $book = $this->book_meta;
+        foreach ($book as $b) {
+            $text .= "<book>\n";
+            $text .= "<title>".$b[0]."</title>\n";
+            $text .= "<author>".$b[1]."</author>\n";
+            $text .= "<year>".$b[2]."</year>\n";
+            $text .= "<pagenumber>".$b[3]."</pagenumber>\n";
+            $text .= "</book>\n";
+        }
+        // taglist
+        $taglist = $this->taglist_infile;
+        foreach ($taglist as $tagitem) {
+            $text .= "<tagitem>\n";
+            $text .= "<id>".$tagitem[0]."</id>\n";
+            $text .= "<name>".$tagitem[1]."</name>\n";
+            $text .= "<tag>".$tagitem[2]."</tag>\n";
+            $text .= "<color>".$tagitem[3]."</color>\n";
+            $text .= "</tagitem>\n";
+        }
+        
+        // text_content
+        $text .= $text_content;
+        $text .= "\n</text>";
+
+        return $text;
+    }
     public function SaveFullTextToLGService($_postdata) {
         // save tagged text (full text) by Jorge's API to lg service
 
@@ -355,12 +397,20 @@
             } else {
                 $require = $_postdata['text'];
             }
-            
+ 
             $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require);
             $require = preg_replace('/&amp;/u', "&", $require); 
             $require = preg_replace("/○/u", " ", $require); 
             $require = preg_replace("/<br>/u", "\n", $require);
-            $require = preg_replace("/<br>/u", "\n", $require);
+            //$require = preg_replace("/<br>/u", "\n", $require);
+
+            // TODO: append metadata at the beginning of file
+            if ($_postdata['branchId'] == 0) {
+                // -- new branch case
+                $require = "<text_content>\n".$require."</text_content>\n";
+            }
+            $require = $this->AppendMetaData($require);
+                
             //saving in my local machine in developing phrase
             file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require);
         } 
@@ -409,8 +459,9 @@
         ));       
 
         // execute the request
-        $output = curl_exec($ch);
-
+        // **** commended to DEBUG ***
+        //$output = curl_exec($ch);
+        // *****
 
         // output the profile information - includes the header
         //echo($output) . PHP_EOL;
@@ -1147,24 +1198,34 @@
         $text = file_get_contents($filename);
         $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string");
 
-        $this->topic = $xml->topic; // set topic id
+        $this->topic = (string)$xml->topic; // set topic id
 
         // get taglist in file
-        $taglist_infile = $xml->taglist;
+        $taglist_infile = $xml->tagitem;
         $taglistArray = array();
         foreach ($taglist_infile as $row) {
-            array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color));
+            array_push($taglistArray, array((string)$row->id,(string)$row->name,(string)$row->tag,(string)$row->color ));
+        }
+        if ($taglistArray) {
+            $this->taglist_infile = $taglistArray; 
         }
-        $this->taglist_infile = $taglistArray; 
 
-        // TODO: get meta data of book
-        $this->book_meta = $xml->book;
+        // get book meta data
+        $book_meta = $xml->book;
+        $book_metaArray = array();
+        foreach ($book_meta as $row) {
+            //array_push($book_metaArray, array('title'=>(string)$row->title,'author'=>(string)$row->author,'year'=>(string)$row->year,'pagenumber'=>(string)$row->pagenumber ));
+            array_push($book_metaArray, array((string)$row->title,(string)$row->author,(string)$row->year,(string)$row->pagenumber ));
+        }
+        if ($book_metaArray) {
+            $this->book_meta = $book_metaArray;
+        }
 
         // echo $taglist->name.", ".$taglist->tag."," .$taglist->color;
         // --- detect if the taglist set is up-to-date or not ---
-
-
-        $contentString = $xml->text_content->asXML();
+        $contentString = (string)$xml->text_content->asXML();
+        //$removed_str = array("<text_content>","</text_content>");
+        //$new_contentString = str_replace($removed_str, "", $contentString);
 
         return $contentString;
     }
@@ -1180,7 +1241,8 @@
         $data_path = $this->GetDataPath();
         if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) {
             $filename = $data_path."parsing_files/".$section_id.".txt";
-             // --- parsing meta data
+            
+            // --- parsing meta data
             $stringInput = $this->ParseMetaData($filename);
             // ----
 
--- a/develop/views/Extractapp/TaggingText.php	Sun Mar 08 22:28:21 2015 +0100
+++ b/develop/views/Extractapp/TaggingText.php	Tue Mar 10 13:46:16 2015 +0100
@@ -12,6 +12,8 @@
 $messages = $viewmodel['messages'];
 
 $taglist_infile = $viewmodel['taglist_infile'];
+$book_meta = $viewmodel['book_meta'];
+
 
 ?>
 
@@ -38,13 +40,12 @@
 	font-size:21px;
 }
 
-
 <?php
 // color on the tags
 foreach ( $taglistArray as $taglistValue ) {
-	echo $taglistValue[2]."\n{\ncolor:".$taglistValue[3]."\n}\n";
-	
-	echo ".span_".$taglistValue[2]."\n{\nbackground-color:".$taglistValue[3]."\n}\n";
+    echo $taglistValue[2]."\n{\ncolor:".$taglistValue[3]."\n}\n";
+    
+    echo ".span_".$taglistValue[2]."\n{\nbackground-color:".$taglistValue[3]."\n}\n";
 }
 
 ?>
@@ -57,36 +58,28 @@
 
 <script type="text/javascript">
 
-// === This is only for developing on local machine ====
-var _GET = JSON.parse('<?php echo json_encode($_GET) ?>');
-if (_GET['id']) {
-    
-    var info = JSON.parse('<?php echo json_encode($info) ?>');
-    var redirectUrl = "http://localhost:1080/extraction-interface/develop/Extractapp/TaggingText";
-    var section_id = info['section_id'];
+// not use this for now
+function _showTagColor(up_to_date) {
+    var taglistArray = "";
+    if (up_to_date) {
+        taglistArray = JSON.parse('<?php echo json_encode($taglistArray) ?>');
+    } else {
+        taglistArray = JSON.parse('<?php echo json_encode($taglist_infile) ?>');
+    }
+
 
-    var form = $('<form action="' + redirectUrl + '" method="post">' +
-        '<input type="hidden" name="sectionId" value="'+section_id+'" />' +
-        '</form>');
-        $('body').append(form);
-        $(form).submit();
-}
-// ====
-// --- check if taglist in file is up-to-date ---
-var check_taglist = JSON.parse('<?php echo json_encode($taglist_infile) ?>');
-if (check_taglist) {
-    // TODO: pop up to ask if load the latest taglist
-    var retVal = confirm("The tag list in the file is not up-to-date. \nDo you want to load file with new tag list?");
-    if( retVal == true ){
-        <?php $$taglistArray = $taglist_infile; ?>
-        alert("Load with new tag list.");
-    }else{
-        alert("You are using the old version of tag list.");
+    for (var i = 0; i < taglistArray.length; i++) {
+        var taglistValue = taglistArray[i];
+        
+        $(taglistValue[2]).css('color', taglistValue[3]);
+        /*var element = document.querySelectorAll(taglistValue[2]);
+        for (var j = 0; j < element.length; j++) {
+            element[j].style.color = taglistValue[3];
+        }
+        */
     }
 }
 
-
-
 // TODO: Popup to proceeding the saving situations
 function handleFileVersionConflict() {
     var info = JSON.parse('<?php echo json_encode($info) ?>');
@@ -206,6 +199,42 @@
         el.stop().animate({'top':finaldestination},0);
     });
 
+    // --- handle old version taglist ---
+    var _GET = JSON.parse('<?php echo json_encode($_GET) ?>');
+    if (_GET['id']) {
+        // === This case should only be possible for developing on local machine ==== 
+        var info = JSON.parse('<?php echo json_encode($info) ?>');
+        var redirectUrl = "http://localhost:1080/extraction-interface/develop/Extractapp/TaggingText";
+        var section_id = info['section_id'];
+
+        var form = $('<form action="' + redirectUrl + '" method="post">' +
+            '<input type="hidden" name="sectionId" value="'+section_id+'" />' +
+            '</form>');
+            $('body').append(form);
+            $(form).submit();
+    } else {
+        // --- TODO: check if taglist in file is up-to-date ---
+        /*
+        var taglist_infile = JSON.parse('<?php echo json_encode($taglist_infile) ?>');
+        if (taglist_infile != "") {
+            // TODO: pop up to ask if load the latest taglist
+            var retVal = confirm("Tag list in file is not up-to-date. \nDo you want to load file with new tag list?");
+            if( retVal == true ){
+                // use $taglisArray
+                alert("Load file with new tag list.");
+            }else{
+                // use $taglist_infile as $taglistArray
+                <?php 
+                // $taglistArray = $taglist_infile; 
+                ?>
+                alert("You are viewing the file with old version of tag list. Please use the up-to-date taglist.");
+            }
+            //_showTagColor(retVal);
+            <?php $taglist_infile = ""; ?>
+        }
+        */
+    }
+
 
 
     handleFileVersionConflict();
@@ -260,6 +289,10 @@
         if ( container.innerHTML.indexOf( "br" ) != -1 ) {
             var newselect = document.createElement("select");
             newselect.id = "TitletagType";
+            
+
+            console.log(JSON.parse('<?php echo json_encode($taglistArray) ?>'));
+
             <?php 
                 foreach ( $taglistArray as $taglistValue ) {
                     echo "newselect.innerHTML += \"<option value='".$taglistValue[2]."'>".$taglistValue[1]."</option>\";\n";
@@ -796,6 +829,26 @@
     hiddenField.setAttribute("value", text);
     form.appendChild(hiddenField);
 
+    // pass taglist
+    var hiddenField = document.createElement("input");      
+    hiddenField.setAttribute("name", "taglistArray");
+    var taglistObj = JSON.parse('<?php echo json_encode($taglist_infile) ?>');
+    if (!taglistObj) {
+        taglistObj = JSON.parse('<?php echo json_encode($taglistArray) ?>');
+    }
+    taglistArray = JSON.stringify(taglistObj);
+    hiddenField.setAttribute("value", taglistArray);
+    form.appendChild(hiddenField);
+
+    // pass book_meta
+    var hiddenField = document.createElement("input");      
+    hiddenField.setAttribute("name", "book_meta");
+    var book_metaObj = JSON.parse('<?php echo json_encode($book_meta) ?>');
+    book_metaArray = JSON.stringify(book_metaObj);
+    hiddenField.setAttribute("value", book_metaArray);
+    form.appendChild(hiddenField);    
+
+
     var info = JSON.parse( '<?php echo json_encode($info) ?>');
 
     if (info) {
@@ -990,7 +1043,10 @@
                     
                         <!--<button onclick="cleanUpTextArea()" style="height: 30px; width: 220px">Reform the text</button></br>-->
                         <!-- save text in local. replaced by saveTextToLGService, which is also saving on copy in local in development stage -->
+                        <!-- 
                         <button onclick="saveText(<?php echo $section_id; ?>)" style="height: 30px; width: 220px">Save the text</button></br>
+                        -->
+
                         <button onclick="editText()" id="editTextId" style="height: 30px; width: 220px">Edit the text</button></br>
                         </br>
                         <button onclick="window.open('./EditWordlist')" style="height: 30px; width: 220px">Manage Word List</button></br>