Mercurial > hg > extraction-interface
diff develop/models/extractapp.php @ 39:8347776a44fc extractapp
embedded topic, taglist,book metadata in file
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de |
---|---|
date | Sun, 08 Mar 2015 22:28:21 +0100 |
parents | 4b3da4802998 |
children | 2e938dc046db |
line wrap: on
line diff
--- a/develop/models/extractapp.php Fri Mar 06 17:56:48 2015 +0100 +++ b/develop/models/extractapp.php Sun Mar 08 22:28:21 2015 +0100 @@ -8,7 +8,8 @@ return array("Index Value 1", "Value 2", "Value 3"); } - protected $section_id = 0, $data_path, $file_id = 0, $current_fileId=0, $branch_id = 0, $user_id = 0, $lg_text = "", $topic = 0; + protected $section_id = 0, $data_path, $file_id = 0, $current_fileId=0, + $branch_id = 0, $user_id = 0, $lg_text = "", $topic = 0, $taglist_infile = "", $book_meta = ""; public $messages = ""; private function Initialize($_urlvalues) { @@ -193,26 +194,49 @@ */ + private function Taglist_infileUpToDate($taglistArray) { + // compare $this->taglist_infile is the same as $taglistArray + $taglist_infile = $this->taglist_infile; + if (count($taglist_infile) != count($taglistArray)) { + return false; + } + foreach ($taglistArray as $row_indb) { + foreach ($taglist_infile as $row) { + //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] ) + if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) { + + } else { + return false; + } + } + } + return true; + } // === for tagging === public function StartTagging() { - // $this->Initialize($urlvalues); - $section_id = $this->section_id; $stringInput = $this->lg_text; - + + $data = array(); // data array to be passed to view //$taglistArray = $this->GetTaglistArray(); //for GetTaglistByTopicID: $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); + + // TODO: check if taglist_infile is up-to-date + if ( !$this->Taglist_infileUpToDate($taglistArray)) { + $data['taglist_infile'] = $this->taglist_infile; + } + + // topic list $topiclistArray = $this->GetTopiclistArray(); $wordlistArray = $this->GetWordlistArray(); - $data = array(); $data['stringInput'] = $stringInput; $data['taglistArray'] = $taglistArray; $data['wordlistArray'] = $wordlistArray; @@ -1119,6 +1143,31 @@ return $data; } + private function ParseMetaData($filename) { + $text = file_get_contents($filename); + $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string"); + + $this->topic = $xml->topic; // set topic id + + // get taglist in file + $taglist_infile = $xml->taglist; + $taglistArray = array(); + foreach ($taglist_infile as $row) { + array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color)); + } + $this->taglist_infile = $taglistArray; + + // TODO: get meta data of book + $this->book_meta = $xml->book; + + // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; + // --- detect if the taglist set is up-to-date or not --- + + + $contentString = $xml->text_content->asXML(); + + return $contentString; + } private function GetSectionContent() { $section_id = $this->GetSectionId(); $section_info = $this->GetSectionInfo(); @@ -1130,8 +1179,10 @@ $contentString=""; $data_path = $this->GetDataPath(); if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { - $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt"); - $stringInput = $contentString; + $filename = $data_path."parsing_files/".$section_id.".txt"; + // --- parsing meta data + $stringInput = $this->ParseMetaData($filename); + // ---- // if the text is from file system $stringInput = preg_replace("/ /u", "○", $stringInput);