Mercurial > hg > extraction-interface
changeset 39:8347776a44fc extractapp
embedded topic, taglist,book metadata in file
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de |
---|---|
date | Sun, 08 Mar 2015 22:28:21 +0100 |
parents | 4b3da4802998 |
children | 2e938dc046db |
files | develop/config/config.php develop/data/parsing_files/1.txt develop/models/extractapp.php develop/views/Extractapp/TaggingText.php |
diffstat | 4 files changed, 125 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/develop/config/config.php Fri Mar 06 17:56:48 2015 +0100 +++ b/develop/config/config.php Sun Mar 08 22:28:21 2015 +0100 @@ -2,11 +2,12 @@ // localhost $systemNAME = "interface"; -$mysql_database = "Gazetteer"; +$mysql_database = "Gazetteers"; $mysql_server = "localhost"; $mysql_user = "root"; -$mysql_password = "admin"; +$mysql_password = "root"; -$system_root_url = "http://localgazetteers-dev/extraction-interface/develop"; +$system_root_url = "http://localhost:1080/extraction-interface/develop"; + ?>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/develop/data/parsing_files/1.txt Sun Mar 08 22:28:21 2015 +0100 @@ -0,0 +1,49 @@ +<?xml version="1.0" encoding="UTF-8"?> +<text> + <topic>2</topic> + <book category="localgazetteer"> + <title>南陵縣志</title> + <author>qwer</author> + <year>1999</year> + <pagenumber>333</pagenumber> + </book> + + <taglist> + <id>2</id> + <name>人名</name> + <tag>person</tag> + <color>rgb(8, 148, 255)</color> + </taglist> + <taglist> + <id>29</id> + <name>入仕時間</name> + <tag>entry_time</tag> + <color>rgb(174, 179, 39)</color> + </taglist> + <taglist> + <id>31</id> + <name>物產名稱</name> + <tag>product_name</tag> + <color>rgb(227, 63, 227)</color> + </taglist> + + <text_content> + + 【102】 契本工課鈔肆貫 +物産 按淳属嚴郡爲 +〈古揚州之境〉而其物産之見於經 者曰陽鳥攸居而已曰篠簜既敷而已曰厥草惟 夭厥木惟喬而已固今日諸郡縣之所同而非止 嚴陵属縣爲然也謹擇其有資於民生服食噐用 +者載焉 +<shu>〈糓〉</shu>〈糓〉 秔 +<shu>〈糓〉</shu>有 +<shu>〈糓〉</shu><product_name>早稻</product_name> +<shu>〈糓〉</shu><product_name>晚稻</product_name> +<shu>〈糓〉</shu><product_name>紅稻</product_name> +<shu>〈糓〉</shu><product_name>白稻</product_name> + +<shu>〈糯〉</shu>〈糯〉有 +<shu>〈糯〉</shu><product_name>秋糯</product_name> +<shu>〈糯〉</shu><product_name>晩糯</product_name> + + + </text_content> +</text> \ No newline at end of file
--- a/develop/models/extractapp.php Fri Mar 06 17:56:48 2015 +0100 +++ b/develop/models/extractapp.php Sun Mar 08 22:28:21 2015 +0100 @@ -8,7 +8,8 @@ return array("Index Value 1", "Value 2", "Value 3"); } - protected $section_id = 0, $data_path, $file_id = 0, $current_fileId=0, $branch_id = 0, $user_id = 0, $lg_text = "", $topic = 0; + protected $section_id = 0, $data_path, $file_id = 0, $current_fileId=0, + $branch_id = 0, $user_id = 0, $lg_text = "", $topic = 0, $taglist_infile = "", $book_meta = ""; public $messages = ""; private function Initialize($_urlvalues) { @@ -193,26 +194,49 @@ */ + private function Taglist_infileUpToDate($taglistArray) { + // compare $this->taglist_infile is the same as $taglistArray + $taglist_infile = $this->taglist_infile; + if (count($taglist_infile) != count($taglistArray)) { + return false; + } + foreach ($taglistArray as $row_indb) { + foreach ($taglist_infile as $row) { + //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] ) + if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) { + + } else { + return false; + } + } + } + return true; + } // === for tagging === public function StartTagging() { - // $this->Initialize($urlvalues); - $section_id = $this->section_id; $stringInput = $this->lg_text; - + + $data = array(); // data array to be passed to view //$taglistArray = $this->GetTaglistArray(); //for GetTaglistByTopicID: $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); + + // TODO: check if taglist_infile is up-to-date + if ( !$this->Taglist_infileUpToDate($taglistArray)) { + $data['taglist_infile'] = $this->taglist_infile; + } + + // topic list $topiclistArray = $this->GetTopiclistArray(); $wordlistArray = $this->GetWordlistArray(); - $data = array(); $data['stringInput'] = $stringInput; $data['taglistArray'] = $taglistArray; $data['wordlistArray'] = $wordlistArray; @@ -1119,6 +1143,31 @@ return $data; } + private function ParseMetaData($filename) { + $text = file_get_contents($filename); + $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string"); + + $this->topic = $xml->topic; // set topic id + + // get taglist in file + $taglist_infile = $xml->taglist; + $taglistArray = array(); + foreach ($taglist_infile as $row) { + array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color)); + } + $this->taglist_infile = $taglistArray; + + // TODO: get meta data of book + $this->book_meta = $xml->book; + + // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; + // --- detect if the taglist set is up-to-date or not --- + + + $contentString = $xml->text_content->asXML(); + + return $contentString; + } private function GetSectionContent() { $section_id = $this->GetSectionId(); $section_info = $this->GetSectionInfo(); @@ -1130,8 +1179,10 @@ $contentString=""; $data_path = $this->GetDataPath(); if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { - $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt"); - $stringInput = $contentString; + $filename = $data_path."parsing_files/".$section_id.".txt"; + // --- parsing meta data + $stringInput = $this->ParseMetaData($filename); + // ---- // if the text is from file system $stringInput = preg_replace("/ /u", "○", $stringInput);
--- a/develop/views/Extractapp/TaggingText.php Fri Mar 06 17:56:48 2015 +0100 +++ b/develop/views/Extractapp/TaggingText.php Sun Mar 08 22:28:21 2015 +0100 @@ -11,6 +11,7 @@ $info = $viewmodel['info']; $messages = $viewmodel['messages']; +$taglist_infile = $viewmodel['taglist_infile']; ?> @@ -61,7 +62,7 @@ if (_GET['id']) { var info = JSON.parse('<?php echo json_encode($info) ?>'); - var redirectUrl = "http://localhost:1080/localmonographs/develop/Extractapp/TaggingText"; + var redirectUrl = "http://localhost:1080/extraction-interface/develop/Extractapp/TaggingText"; var section_id = info['section_id']; var form = $('<form action="' + redirectUrl + '" method="post">' + @@ -71,6 +72,18 @@ $(form).submit(); } // ==== +// --- check if taglist in file is up-to-date --- +var check_taglist = JSON.parse('<?php echo json_encode($taglist_infile) ?>'); +if (check_taglist) { + // TODO: pop up to ask if load the latest taglist + var retVal = confirm("The tag list in the file is not up-to-date. \nDo you want to load file with new tag list?"); + if( retVal == true ){ + <?php $$taglistArray = $taglist_infile; ?> + alert("Load with new tag list."); + }else{ + alert("You are using the old version of tag list."); + } +}