Mercurial > hg > extraction-interface
diff develop/models/extractapp.php @ 40:2e938dc046db extractapp
load,save xml file with topic, etc.
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 10 Mar 2015 13:46:16 +0100 |
parents | 8347776a44fc |
children | 533a6c39c128 |
line wrap: on
line diff
--- a/develop/models/extractapp.php Sun Mar 08 22:28:21 2015 +0100 +++ b/develop/models/extractapp.php Tue Mar 10 13:46:16 2015 +0100 @@ -18,13 +18,6 @@ } public function GetTextFromFileId($_postdata) { - /* - if ($this->current_fileId != 0) { - $this->file_id = $this->current_fileId; - } else { - $this->file_id = $_postdata['fileId']; - } - */ $this->file_id = $_postdata['fileId']; $branch_id = $_postdata['branchId']; $section_id = $_postdata['sectionId']; @@ -33,15 +26,18 @@ $this->user_id = $_postdata['userId']; $this->section_id = $section_id; - $this->section_name = $_postdata['sectionName']; $this->book_id = $_postdata['bookId']; $this->book_name = $_postdata['bookName']; - // get from URL with file_id $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; - $lg_text = file_get_contents($lg_text_url); + //$lg_text = file_get_contents($lg_text_url); + // --- parsing meta data + $lg_text = $this->ParseMetaData($lg_text_url); + + // ---- + $stringInput = $lg_text; $stringInput = preg_replace("/ /u", "○", $stringInput); @@ -54,7 +50,6 @@ } public function GetTextFromSectionId($_postdata) { - $section_id = $_postdata['sectionId']; $this->section_id = $section_id; $this->user_id = $_postdata['userId']; @@ -65,7 +60,6 @@ // get from URL with file_id $lg_text_url = $this->get_text_from_sectionId_url.$section_id; - $lg_text = file_get_contents($lg_text_url); $stringInput = $lg_text; @@ -118,8 +112,12 @@ if ($_postdata['currentFileId']) { $this->current_fileId = $_postdata['currentFileId']; } - - + if ($_postdata['taglistArray']) { + $this->taglist_infile = json_decode($_postdata['taglistArray']); + } + if ($_postdata['book_meta']) { + $this->book_meta = json_decode($_postdata['book_meta']); + } } public function InitData($_postdata) { @@ -194,6 +192,7 @@ */ + // TODO: comparison not correct private function Taglist_infileUpToDate($taglistArray) { // compare $this->taglist_infile is the same as $taglistArray $taglist_infile = $this->taglist_infile; @@ -202,14 +201,18 @@ } foreach ($taglistArray as $row_indb) { + $cnt = 0; foreach ($taglist_infile as $row) { //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] ) if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) { } else { - return false; + $cnt ++; } } + if ($cnt == count($row)) { + return false; + } } return true; } @@ -225,11 +228,18 @@ //for GetTaglistByTopicID: $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); + $data['taglist_infile'] = $this->taglist_infile; // TODO: check if taglist_infile is up-to-date + /* if ( !$this->Taglist_infileUpToDate($taglistArray)) { $data['taglist_infile'] = $this->taglist_infile; - } + } else { + $data['taglist_infile'] = ""; + } + */ + // book_meta + $data['book_meta'] = $this->book_meta; // topic list $topiclistArray = $this->GetTopiclistArray(); @@ -338,6 +348,38 @@ } + private function AppendMetaData($text_content) { + $text = '<?xml version="1.0" encoding="UTF-8"?>'; + $text .= "\n<text>\n"; + // topic + $text .= "<topic>".$this->topic."</topic>\n"; + // book meta data + $book = $this->book_meta; + foreach ($book as $b) { + $text .= "<book>\n"; + $text .= "<title>".$b[0]."</title>\n"; + $text .= "<author>".$b[1]."</author>\n"; + $text .= "<year>".$b[2]."</year>\n"; + $text .= "<pagenumber>".$b[3]."</pagenumber>\n"; + $text .= "</book>\n"; + } + // taglist + $taglist = $this->taglist_infile; + foreach ($taglist as $tagitem) { + $text .= "<tagitem>\n"; + $text .= "<id>".$tagitem[0]."</id>\n"; + $text .= "<name>".$tagitem[1]."</name>\n"; + $text .= "<tag>".$tagitem[2]."</tag>\n"; + $text .= "<color>".$tagitem[3]."</color>\n"; + $text .= "</tagitem>\n"; + } + + // text_content + $text .= $text_content; + $text .= "\n</text>"; + + return $text; + } public function SaveFullTextToLGService($_postdata) { // save tagged text (full text) by Jorge's API to lg service @@ -355,12 +397,20 @@ } else { $require = $_postdata['text']; } - + $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require); $require = preg_replace('/&/u', "&", $require); $require = preg_replace("/○/u", " ", $require); $require = preg_replace("/<br>/u", "\n", $require); - $require = preg_replace("/<br>/u", "\n", $require); + //$require = preg_replace("/<br>/u", "\n", $require); + + // TODO: append metadata at the beginning of file + if ($_postdata['branchId'] == 0) { + // -- new branch case + $require = "<text_content>\n".$require."</text_content>\n"; + } + $require = $this->AppendMetaData($require); + //saving in my local machine in developing phrase file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require); } @@ -409,8 +459,9 @@ )); // execute the request - $output = curl_exec($ch); - + // **** commended to DEBUG *** + //$output = curl_exec($ch); + // ***** // output the profile information - includes the header //echo($output) . PHP_EOL; @@ -1147,24 +1198,34 @@ $text = file_get_contents($filename); $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string"); - $this->topic = $xml->topic; // set topic id + $this->topic = (string)$xml->topic; // set topic id // get taglist in file - $taglist_infile = $xml->taglist; + $taglist_infile = $xml->tagitem; $taglistArray = array(); foreach ($taglist_infile as $row) { - array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color)); + array_push($taglistArray, array((string)$row->id,(string)$row->name,(string)$row->tag,(string)$row->color )); + } + if ($taglistArray) { + $this->taglist_infile = $taglistArray; } - $this->taglist_infile = $taglistArray; - // TODO: get meta data of book - $this->book_meta = $xml->book; + // get book meta data + $book_meta = $xml->book; + $book_metaArray = array(); + foreach ($book_meta as $row) { + //array_push($book_metaArray, array('title'=>(string)$row->title,'author'=>(string)$row->author,'year'=>(string)$row->year,'pagenumber'=>(string)$row->pagenumber )); + array_push($book_metaArray, array((string)$row->title,(string)$row->author,(string)$row->year,(string)$row->pagenumber )); + } + if ($book_metaArray) { + $this->book_meta = $book_metaArray; + } // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; // --- detect if the taglist set is up-to-date or not --- - - - $contentString = $xml->text_content->asXML(); + $contentString = (string)$xml->text_content->asXML(); + //$removed_str = array("<text_content>","</text_content>"); + //$new_contentString = str_replace($removed_str, "", $contentString); return $contentString; } @@ -1180,7 +1241,8 @@ $data_path = $this->GetDataPath(); if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { $filename = $data_path."parsing_files/".$section_id.".txt"; - // --- parsing meta data + + // --- parsing meta data $stringInput = $this->ParseMetaData($filename); // ----