diff develop/models/extractapp.php @ 40:2e938dc046db extractapp

load,save xml file with topic, etc.
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Tue, 10 Mar 2015 13:46:16 +0100
parents 8347776a44fc
children 533a6c39c128
line wrap: on
line diff
--- a/develop/models/extractapp.php	Sun Mar 08 22:28:21 2015 +0100
+++ b/develop/models/extractapp.php	Tue Mar 10 13:46:16 2015 +0100
@@ -18,13 +18,6 @@
     }
 
     public function GetTextFromFileId($_postdata) {
-        /*
-        if ($this->current_fileId != 0) {
-            $this->file_id = $this->current_fileId;
-        } else {
-            $this->file_id = $_postdata['fileId'];
-        }
-        */
         $this->file_id = $_postdata['fileId'];
         $branch_id = $_postdata['branchId'];
         $section_id = $_postdata['sectionId'];
@@ -33,15 +26,18 @@
         $this->user_id = $_postdata['userId'];
         $this->section_id = $section_id;
 
-
         $this->section_name = $_postdata['sectionName'];
         $this->book_id = $_postdata['bookId'];
         $this->book_name = $_postdata['bookName'];
 
-        
         // get from URL with file_id
         $lg_text_url = $this->get_text_from_fileId_url.$this->file_id;
-        $lg_text = file_get_contents($lg_text_url);
+        //$lg_text = file_get_contents($lg_text_url);
+        // --- parsing meta data
+        $lg_text = $this->ParseMetaData($lg_text_url);
+
+        // ----
+
 
         $stringInput = $lg_text;
         $stringInput = preg_replace("/ /u", "○", $stringInput);
@@ -54,7 +50,6 @@
     }
 
     public function GetTextFromSectionId($_postdata) {
-        
         $section_id = $_postdata['sectionId'];
         $this->section_id = $section_id;
         $this->user_id = $_postdata['userId'];
@@ -65,7 +60,6 @@
 
         // get from URL with file_id
         $lg_text_url = $this->get_text_from_sectionId_url.$section_id;
-
         $lg_text = file_get_contents($lg_text_url);
 
         $stringInput = $lg_text;
@@ -118,8 +112,12 @@
         if ($_postdata['currentFileId']) {
             $this->current_fileId = $_postdata['currentFileId'];
         }
-
-       
+        if ($_postdata['taglistArray']) {
+            $this->taglist_infile = json_decode($_postdata['taglistArray']);
+        }
+        if ($_postdata['book_meta']) {
+            $this->book_meta = json_decode($_postdata['book_meta']);
+        }
 
     }
     public function InitData($_postdata) {
@@ -194,6 +192,7 @@
     */
 
 
+    // TODO: comparison not correct
     private function Taglist_infileUpToDate($taglistArray) {
         // compare $this->taglist_infile is the same as $taglistArray
         $taglist_infile = $this->taglist_infile;
@@ -202,14 +201,18 @@
         }
 
         foreach ($taglistArray as $row_indb) {
+            $cnt = 0;
             foreach ($taglist_infile as $row) {
                 //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] )
                 if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) {
 
                 } else {
-                    return false;
+                    $cnt ++;
                 }
             }
+            if ($cnt == count($row)) {
+                return false;
+            }
         }
         return true;
     }
@@ -225,11 +228,18 @@
         //for GetTaglistByTopicID: 
         $taglistArray = $this->GetTaglistByTopicID($this->GetTopic());
         
+        $data['taglist_infile'] = $this->taglist_infile;
         // TODO: check if taglist_infile is up-to-date
+        /*
         if ( !$this->Taglist_infileUpToDate($taglistArray)) {
             $data['taglist_infile'] = $this->taglist_infile;
-        } 
+        } else {
+            $data['taglist_infile'] = "";
+        }
+        */
         
+        // book_meta
+        $data['book_meta'] = $this->book_meta;
 
         // topic list
         $topiclistArray = $this->GetTopiclistArray();
@@ -338,6 +348,38 @@
         
 
     }
+    private function AppendMetaData($text_content) {
+        $text = '<?xml version="1.0" encoding="UTF-8"?>';
+        $text .= "\n<text>\n";
+        // topic
+        $text .= "<topic>".$this->topic."</topic>\n";
+        // book meta data
+        $book = $this->book_meta;
+        foreach ($book as $b) {
+            $text .= "<book>\n";
+            $text .= "<title>".$b[0]."</title>\n";
+            $text .= "<author>".$b[1]."</author>\n";
+            $text .= "<year>".$b[2]."</year>\n";
+            $text .= "<pagenumber>".$b[3]."</pagenumber>\n";
+            $text .= "</book>\n";
+        }
+        // taglist
+        $taglist = $this->taglist_infile;
+        foreach ($taglist as $tagitem) {
+            $text .= "<tagitem>\n";
+            $text .= "<id>".$tagitem[0]."</id>\n";
+            $text .= "<name>".$tagitem[1]."</name>\n";
+            $text .= "<tag>".$tagitem[2]."</tag>\n";
+            $text .= "<color>".$tagitem[3]."</color>\n";
+            $text .= "</tagitem>\n";
+        }
+        
+        // text_content
+        $text .= $text_content;
+        $text .= "\n</text>";
+
+        return $text;
+    }
     public function SaveFullTextToLGService($_postdata) {
         // save tagged text (full text) by Jorge's API to lg service
 
@@ -355,12 +397,20 @@
             } else {
                 $require = $_postdata['text'];
             }
-            
+ 
             $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require);
             $require = preg_replace('/&amp;/u', "&", $require); 
             $require = preg_replace("/○/u", " ", $require); 
             $require = preg_replace("/<br>/u", "\n", $require);
-            $require = preg_replace("/<br>/u", "\n", $require);
+            //$require = preg_replace("/<br>/u", "\n", $require);
+
+            // TODO: append metadata at the beginning of file
+            if ($_postdata['branchId'] == 0) {
+                // -- new branch case
+                $require = "<text_content>\n".$require."</text_content>\n";
+            }
+            $require = $this->AppendMetaData($require);
+                
             //saving in my local machine in developing phrase
             file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require);
         } 
@@ -409,8 +459,9 @@
         ));       
 
         // execute the request
-        $output = curl_exec($ch);
-
+        // **** commended to DEBUG ***
+        //$output = curl_exec($ch);
+        // *****
 
         // output the profile information - includes the header
         //echo($output) . PHP_EOL;
@@ -1147,24 +1198,34 @@
         $text = file_get_contents($filename);
         $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string");
 
-        $this->topic = $xml->topic; // set topic id
+        $this->topic = (string)$xml->topic; // set topic id
 
         // get taglist in file
-        $taglist_infile = $xml->taglist;
+        $taglist_infile = $xml->tagitem;
         $taglistArray = array();
         foreach ($taglist_infile as $row) {
-            array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color));
+            array_push($taglistArray, array((string)$row->id,(string)$row->name,(string)$row->tag,(string)$row->color ));
+        }
+        if ($taglistArray) {
+            $this->taglist_infile = $taglistArray; 
         }
-        $this->taglist_infile = $taglistArray; 
 
-        // TODO: get meta data of book
-        $this->book_meta = $xml->book;
+        // get book meta data
+        $book_meta = $xml->book;
+        $book_metaArray = array();
+        foreach ($book_meta as $row) {
+            //array_push($book_metaArray, array('title'=>(string)$row->title,'author'=>(string)$row->author,'year'=>(string)$row->year,'pagenumber'=>(string)$row->pagenumber ));
+            array_push($book_metaArray, array((string)$row->title,(string)$row->author,(string)$row->year,(string)$row->pagenumber ));
+        }
+        if ($book_metaArray) {
+            $this->book_meta = $book_metaArray;
+        }
 
         // echo $taglist->name.", ".$taglist->tag."," .$taglist->color;
         // --- detect if the taglist set is up-to-date or not ---
-
-
-        $contentString = $xml->text_content->asXML();
+        $contentString = (string)$xml->text_content->asXML();
+        //$removed_str = array("<text_content>","</text_content>");
+        //$new_contentString = str_replace($removed_str, "", $contentString);
 
         return $contentString;
     }
@@ -1180,7 +1241,8 @@
         $data_path = $this->GetDataPath();
         if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) {
             $filename = $data_path."parsing_files/".$section_id.".txt";
-             // --- parsing meta data
+            
+            // --- parsing meta data
             $stringInput = $this->ParseMetaData($filename);
             // ----