Mercurial > hg > extraction-interface
changeset 40:2e938dc046db extractapp
load,save xml file with topic, etc.
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 10 Mar 2015 13:46:16 +0100 |
parents | 8347776a44fc |
children | 533a6c39c128 |
files | develop/config/config.php develop/data/parsing_files/1.txt develop/models/extractapp.php develop/views/Extractapp/TaggingText.php |
diffstat | 4 files changed, 235 insertions(+), 99 deletions(-) [+] |
line wrap: on
line diff
--- a/develop/config/config.php Sun Mar 08 22:28:21 2015 +0100 +++ b/develop/config/config.php Tue Mar 10 13:46:16 2015 +0100 @@ -1,13 +1,24 @@ <?php -// localhost -$systemNAME = "interface"; -$mysql_database = "Gazetteers"; -$mysql_server = "localhost"; -$mysql_user = "root"; -$mysql_password = "root"; +$at_local = false; -$system_root_url = "http://localhost:1080/extraction-interface/develop"; +if (!$at_local) { + // host at localgazetteers-dev server + $systemNAME = "interface"; + $mysql_database = "Gazetteer"; + $mysql_server = "localhost"; + $mysql_user = "root"; + $mysql_password = "admin"; + $system_root_url = "http://localgazetteers-dev/extraction-interface/develop"; +} else { + // localhost + $systemNAME = "interface"; + $mysql_database = "Gazetteers"; + $mysql_server = "localhost"; + $mysql_user = "root"; + $mysql_password = "root"; + $system_root_url = "http://localhost:1080/extraction-interface/develop"; +} ?>
--- a/develop/data/parsing_files/1.txt Sun Mar 08 22:28:21 2015 +0100 +++ b/develop/data/parsing_files/1.txt Tue Mar 10 13:46:16 2015 +0100 @@ -1,34 +1,43 @@ <?xml version="1.0" encoding="UTF-8"?> <text> - <topic>2</topic> - <book category="localgazetteer"> - <title>南陵縣志</title> - <author>qwer</author> - <year>1999</year> - <pagenumber>333</pagenumber> - </book> - - <taglist> - <id>2</id> - <name>人名</name> - <tag>person</tag> - <color>rgb(8, 148, 255)</color> - </taglist> - <taglist> - <id>29</id> - <name>入仕時間</name> - <tag>entry_time</tag> - <color>rgb(174, 179, 39)</color> - </taglist> - <taglist> - <id>31</id> - <name>物產名稱</name> - <tag>product_name</tag> - <color>rgb(227, 63, 227)</color> - </taglist> - - <text_content> - +<topic>2</topic> +<book> +<title>南陵縣志</title> +<author>qwer</author> +<year>1999</year> +<pagenumber>333</pagenumber> +</book> +<tagitem> +<id>31</id> +<name>物產名稱</name> +<tag>product_name</tag> +<color>rgb(227, 63, 227)</color> +</tagitem> +<tagitem> +<id>32</id> +<name>屬</name> +<tag>shu</tag> +<color>rgb(143, 14, 143)</color> +</tagitem> +<tagitem> +<id>17</id> +<name>無意義</name> +<tag>null</tag> +<color>rgb(219, 219, 219)</color> +</tagitem> +<tagitem> +<id>34</id> +<name>物產產地</name> +<tag>product_place</tag> +<color>rgb(7, 64, 209)</color> +</tagitem> +<tagitem> +<id>26</id> +<name>別名</name> +<tag>othername</tag> +<color>rgb(141, 212, 224)</color> +</tagitem> +<text_content> 【102】 契本工課鈔肆貫 物産 按淳属嚴郡爲 〈古揚州之境〉而其物産之見於經 者曰陽鳥攸居而已曰篠簜既敷而已曰厥草惟 夭厥木惟喬而已固今日諸郡縣之所同而非止 嚴陵属縣爲然也謹擇其有資於民生服食噐用 @@ -43,7 +52,5 @@ <shu>〈糯〉</shu>〈糯〉有 <shu>〈糯〉</shu><product_name>秋糯</product_name> <shu>〈糯〉</shu><product_name>晩糯</product_name> - - - </text_content> +</text_content> </text> \ No newline at end of file
--- a/develop/models/extractapp.php Sun Mar 08 22:28:21 2015 +0100 +++ b/develop/models/extractapp.php Tue Mar 10 13:46:16 2015 +0100 @@ -18,13 +18,6 @@ } public function GetTextFromFileId($_postdata) { - /* - if ($this->current_fileId != 0) { - $this->file_id = $this->current_fileId; - } else { - $this->file_id = $_postdata['fileId']; - } - */ $this->file_id = $_postdata['fileId']; $branch_id = $_postdata['branchId']; $section_id = $_postdata['sectionId']; @@ -33,15 +26,18 @@ $this->user_id = $_postdata['userId']; $this->section_id = $section_id; - $this->section_name = $_postdata['sectionName']; $this->book_id = $_postdata['bookId']; $this->book_name = $_postdata['bookName']; - // get from URL with file_id $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; - $lg_text = file_get_contents($lg_text_url); + //$lg_text = file_get_contents($lg_text_url); + // --- parsing meta data + $lg_text = $this->ParseMetaData($lg_text_url); + + // ---- + $stringInput = $lg_text; $stringInput = preg_replace("/ /u", "○", $stringInput); @@ -54,7 +50,6 @@ } public function GetTextFromSectionId($_postdata) { - $section_id = $_postdata['sectionId']; $this->section_id = $section_id; $this->user_id = $_postdata['userId']; @@ -65,7 +60,6 @@ // get from URL with file_id $lg_text_url = $this->get_text_from_sectionId_url.$section_id; - $lg_text = file_get_contents($lg_text_url); $stringInput = $lg_text; @@ -118,8 +112,12 @@ if ($_postdata['currentFileId']) { $this->current_fileId = $_postdata['currentFileId']; } - - + if ($_postdata['taglistArray']) { + $this->taglist_infile = json_decode($_postdata['taglistArray']); + } + if ($_postdata['book_meta']) { + $this->book_meta = json_decode($_postdata['book_meta']); + } } public function InitData($_postdata) { @@ -194,6 +192,7 @@ */ + // TODO: comparison not correct private function Taglist_infileUpToDate($taglistArray) { // compare $this->taglist_infile is the same as $taglistArray $taglist_infile = $this->taglist_infile; @@ -202,14 +201,18 @@ } foreach ($taglistArray as $row_indb) { + $cnt = 0; foreach ($taglist_infile as $row) { //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] ) if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) { } else { - return false; + $cnt ++; } } + if ($cnt == count($row)) { + return false; + } } return true; } @@ -225,11 +228,18 @@ //for GetTaglistByTopicID: $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); + $data['taglist_infile'] = $this->taglist_infile; // TODO: check if taglist_infile is up-to-date + /* if ( !$this->Taglist_infileUpToDate($taglistArray)) { $data['taglist_infile'] = $this->taglist_infile; - } + } else { + $data['taglist_infile'] = ""; + } + */ + // book_meta + $data['book_meta'] = $this->book_meta; // topic list $topiclistArray = $this->GetTopiclistArray(); @@ -338,6 +348,38 @@ } + private function AppendMetaData($text_content) { + $text = '<?xml version="1.0" encoding="UTF-8"?>'; + $text .= "\n<text>\n"; + // topic + $text .= "<topic>".$this->topic."</topic>\n"; + // book meta data + $book = $this->book_meta; + foreach ($book as $b) { + $text .= "<book>\n"; + $text .= "<title>".$b[0]."</title>\n"; + $text .= "<author>".$b[1]."</author>\n"; + $text .= "<year>".$b[2]."</year>\n"; + $text .= "<pagenumber>".$b[3]."</pagenumber>\n"; + $text .= "</book>\n"; + } + // taglist + $taglist = $this->taglist_infile; + foreach ($taglist as $tagitem) { + $text .= "<tagitem>\n"; + $text .= "<id>".$tagitem[0]."</id>\n"; + $text .= "<name>".$tagitem[1]."</name>\n"; + $text .= "<tag>".$tagitem[2]."</tag>\n"; + $text .= "<color>".$tagitem[3]."</color>\n"; + $text .= "</tagitem>\n"; + } + + // text_content + $text .= $text_content; + $text .= "\n</text>"; + + return $text; + } public function SaveFullTextToLGService($_postdata) { // save tagged text (full text) by Jorge's API to lg service @@ -355,12 +397,20 @@ } else { $require = $_postdata['text']; } - + $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require); $require = preg_replace('/&/u', "&", $require); $require = preg_replace("/○/u", " ", $require); $require = preg_replace("/<br>/u", "\n", $require); - $require = preg_replace("/<br>/u", "\n", $require); + //$require = preg_replace("/<br>/u", "\n", $require); + + // TODO: append metadata at the beginning of file + if ($_postdata['branchId'] == 0) { + // -- new branch case + $require = "<text_content>\n".$require."</text_content>\n"; + } + $require = $this->AppendMetaData($require); + //saving in my local machine in developing phrase file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require); } @@ -409,8 +459,9 @@ )); // execute the request - $output = curl_exec($ch); - + // **** commended to DEBUG *** + //$output = curl_exec($ch); + // ***** // output the profile information - includes the header //echo($output) . PHP_EOL; @@ -1147,24 +1198,34 @@ $text = file_get_contents($filename); $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string"); - $this->topic = $xml->topic; // set topic id + $this->topic = (string)$xml->topic; // set topic id // get taglist in file - $taglist_infile = $xml->taglist; + $taglist_infile = $xml->tagitem; $taglistArray = array(); foreach ($taglist_infile as $row) { - array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color)); + array_push($taglistArray, array((string)$row->id,(string)$row->name,(string)$row->tag,(string)$row->color )); + } + if ($taglistArray) { + $this->taglist_infile = $taglistArray; } - $this->taglist_infile = $taglistArray; - // TODO: get meta data of book - $this->book_meta = $xml->book; + // get book meta data + $book_meta = $xml->book; + $book_metaArray = array(); + foreach ($book_meta as $row) { + //array_push($book_metaArray, array('title'=>(string)$row->title,'author'=>(string)$row->author,'year'=>(string)$row->year,'pagenumber'=>(string)$row->pagenumber )); + array_push($book_metaArray, array((string)$row->title,(string)$row->author,(string)$row->year,(string)$row->pagenumber )); + } + if ($book_metaArray) { + $this->book_meta = $book_metaArray; + } // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; // --- detect if the taglist set is up-to-date or not --- - - - $contentString = $xml->text_content->asXML(); + $contentString = (string)$xml->text_content->asXML(); + //$removed_str = array("<text_content>","</text_content>"); + //$new_contentString = str_replace($removed_str, "", $contentString); return $contentString; } @@ -1180,7 +1241,8 @@ $data_path = $this->GetDataPath(); if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { $filename = $data_path."parsing_files/".$section_id.".txt"; - // --- parsing meta data + + // --- parsing meta data $stringInput = $this->ParseMetaData($filename); // ----
--- a/develop/views/Extractapp/TaggingText.php Sun Mar 08 22:28:21 2015 +0100 +++ b/develop/views/Extractapp/TaggingText.php Tue Mar 10 13:46:16 2015 +0100 @@ -12,6 +12,8 @@ $messages = $viewmodel['messages']; $taglist_infile = $viewmodel['taglist_infile']; +$book_meta = $viewmodel['book_meta']; + ?> @@ -38,13 +40,12 @@ font-size:21px; } - <?php // color on the tags foreach ( $taglistArray as $taglistValue ) { - echo $taglistValue[2]."\n{\ncolor:".$taglistValue[3]."\n}\n"; - - echo ".span_".$taglistValue[2]."\n{\nbackground-color:".$taglistValue[3]."\n}\n"; + echo $taglistValue[2]."\n{\ncolor:".$taglistValue[3]."\n}\n"; + + echo ".span_".$taglistValue[2]."\n{\nbackground-color:".$taglistValue[3]."\n}\n"; } ?> @@ -57,36 +58,28 @@ <script type="text/javascript"> -// === This is only for developing on local machine ==== -var _GET = JSON.parse('<?php echo json_encode($_GET) ?>'); -if (_GET['id']) { - - var info = JSON.parse('<?php echo json_encode($info) ?>'); - var redirectUrl = "http://localhost:1080/extraction-interface/develop/Extractapp/TaggingText"; - var section_id = info['section_id']; +// not use this for now +function _showTagColor(up_to_date) { + var taglistArray = ""; + if (up_to_date) { + taglistArray = JSON.parse('<?php echo json_encode($taglistArray) ?>'); + } else { + taglistArray = JSON.parse('<?php echo json_encode($taglist_infile) ?>'); + } + - var form = $('<form action="' + redirectUrl + '" method="post">' + - '<input type="hidden" name="sectionId" value="'+section_id+'" />' + - '</form>'); - $('body').append(form); - $(form).submit(); -} -// ==== -// --- check if taglist in file is up-to-date --- -var check_taglist = JSON.parse('<?php echo json_encode($taglist_infile) ?>'); -if (check_taglist) { - // TODO: pop up to ask if load the latest taglist - var retVal = confirm("The tag list in the file is not up-to-date. \nDo you want to load file with new tag list?"); - if( retVal == true ){ - <?php $$taglistArray = $taglist_infile; ?> - alert("Load with new tag list."); - }else{ - alert("You are using the old version of tag list."); + for (var i = 0; i < taglistArray.length; i++) { + var taglistValue = taglistArray[i]; + + $(taglistValue[2]).css('color', taglistValue[3]); + /*var element = document.querySelectorAll(taglistValue[2]); + for (var j = 0; j < element.length; j++) { + element[j].style.color = taglistValue[3]; + } + */ } } - - // TODO: Popup to proceeding the saving situations function handleFileVersionConflict() { var info = JSON.parse('<?php echo json_encode($info) ?>'); @@ -206,6 +199,42 @@ el.stop().animate({'top':finaldestination},0); }); + // --- handle old version taglist --- + var _GET = JSON.parse('<?php echo json_encode($_GET) ?>'); + if (_GET['id']) { + // === This case should only be possible for developing on local machine ==== + var info = JSON.parse('<?php echo json_encode($info) ?>'); + var redirectUrl = "http://localhost:1080/extraction-interface/develop/Extractapp/TaggingText"; + var section_id = info['section_id']; + + var form = $('<form action="' + redirectUrl + '" method="post">' + + '<input type="hidden" name="sectionId" value="'+section_id+'" />' + + '</form>'); + $('body').append(form); + $(form).submit(); + } else { + // --- TODO: check if taglist in file is up-to-date --- + /* + var taglist_infile = JSON.parse('<?php echo json_encode($taglist_infile) ?>'); + if (taglist_infile != "") { + // TODO: pop up to ask if load the latest taglist + var retVal = confirm("Tag list in file is not up-to-date. \nDo you want to load file with new tag list?"); + if( retVal == true ){ + // use $taglisArray + alert("Load file with new tag list."); + }else{ + // use $taglist_infile as $taglistArray + <?php + // $taglistArray = $taglist_infile; + ?> + alert("You are viewing the file with old version of tag list. Please use the up-to-date taglist."); + } + //_showTagColor(retVal); + <?php $taglist_infile = ""; ?> + } + */ + } + handleFileVersionConflict(); @@ -260,6 +289,10 @@ if ( container.innerHTML.indexOf( "br" ) != -1 ) { var newselect = document.createElement("select"); newselect.id = "TitletagType"; + + + console.log(JSON.parse('<?php echo json_encode($taglistArray) ?>')); + <?php foreach ( $taglistArray as $taglistValue ) { echo "newselect.innerHTML += \"<option value='".$taglistValue[2]."'>".$taglistValue[1]."</option>\";\n"; @@ -796,6 +829,26 @@ hiddenField.setAttribute("value", text); form.appendChild(hiddenField); + // pass taglist + var hiddenField = document.createElement("input"); + hiddenField.setAttribute("name", "taglistArray"); + var taglistObj = JSON.parse('<?php echo json_encode($taglist_infile) ?>'); + if (!taglistObj) { + taglistObj = JSON.parse('<?php echo json_encode($taglistArray) ?>'); + } + taglistArray = JSON.stringify(taglistObj); + hiddenField.setAttribute("value", taglistArray); + form.appendChild(hiddenField); + + // pass book_meta + var hiddenField = document.createElement("input"); + hiddenField.setAttribute("name", "book_meta"); + var book_metaObj = JSON.parse('<?php echo json_encode($book_meta) ?>'); + book_metaArray = JSON.stringify(book_metaObj); + hiddenField.setAttribute("value", book_metaArray); + form.appendChild(hiddenField); + + var info = JSON.parse( '<?php echo json_encode($info) ?>'); if (info) { @@ -990,7 +1043,10 @@ <!--<button onclick="cleanUpTextArea()" style="height: 30px; width: 220px">Reform the text</button></br>--> <!-- save text in local. replaced by saveTextToLGService, which is also saving on copy in local in development stage --> + <!-- <button onclick="saveText(<?php echo $section_id; ?>)" style="height: 30px; width: 220px">Save the text</button></br> + --> + <button onclick="editText()" id="editTextId" style="height: 30px; width: 220px">Edit the text</button></br> </br> <button onclick="window.open('./EditWordlist')" style="height: 30px; width: 220px">Manage Word List</button></br>