Mercurial > hg > extraction-interface
comparison develop/models/extractapp.php @ 39:8347776a44fc extractapp
embedded topic, taglist,book metadata in file
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de |
|---|---|
| date | Sun, 08 Mar 2015 22:28:21 +0100 |
| parents | 4b3da4802998 |
| children | 2e938dc046db |
comparison
equal
deleted
inserted
replaced
| 38:4b3da4802998 | 39:8347776a44fc |
|---|---|
| 6 | 6 |
| 7 public function Index() { | 7 public function Index() { |
| 8 return array("Index Value 1", "Value 2", "Value 3"); | 8 return array("Index Value 1", "Value 2", "Value 3"); |
| 9 } | 9 } |
| 10 | 10 |
| 11 protected $section_id = 0, $data_path, $file_id = 0, $current_fileId=0, $branch_id = 0, $user_id = 0, $lg_text = "", $topic = 0; | 11 protected $section_id = 0, $data_path, $file_id = 0, $current_fileId=0, |
| 12 $branch_id = 0, $user_id = 0, $lg_text = "", $topic = 0, $taglist_infile = "", $book_meta = ""; | |
| 12 public $messages = ""; | 13 public $messages = ""; |
| 13 | 14 |
| 14 private function Initialize($_urlvalues) { | 15 private function Initialize($_urlvalues) { |
| 15 $this->SetSectionId($_urlvalues); | 16 $this->SetSectionId($_urlvalues); |
| 16 | 17 |
| 191 | 192 |
| 192 } | 193 } |
| 193 */ | 194 */ |
| 194 | 195 |
| 195 | 196 |
| 196 | 197 private function Taglist_infileUpToDate($taglistArray) { |
| 198 // compare $this->taglist_infile is the same as $taglistArray | |
| 199 $taglist_infile = $this->taglist_infile; | |
| 200 if (count($taglist_infile) != count($taglistArray)) { | |
| 201 return false; | |
| 202 } | |
| 203 | |
| 204 foreach ($taglistArray as $row_indb) { | |
| 205 foreach ($taglist_infile as $row) { | |
| 206 //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] ) | |
| 207 if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) { | |
| 208 | |
| 209 } else { | |
| 210 return false; | |
| 211 } | |
| 212 } | |
| 213 } | |
| 214 return true; | |
| 215 } | |
| 197 // === for tagging === | 216 // === for tagging === |
| 198 public function StartTagging() { | 217 public function StartTagging() { |
| 199 | 218 |
| 200 // $this->Initialize($urlvalues); | |
| 201 | |
| 202 $section_id = $this->section_id; | 219 $section_id = $this->section_id; |
| 203 $stringInput = $this->lg_text; | 220 $stringInput = $this->lg_text; |
| 204 | 221 |
| 222 $data = array(); // data array to be passed to view | |
| 205 | 223 |
| 206 //$taglistArray = $this->GetTaglistArray(); | 224 //$taglistArray = $this->GetTaglistArray(); |
| 207 //for GetTaglistByTopicID: | 225 //for GetTaglistByTopicID: |
| 208 $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); | 226 $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); |
| 227 | |
| 228 // TODO: check if taglist_infile is up-to-date | |
| 229 if ( !$this->Taglist_infileUpToDate($taglistArray)) { | |
| 230 $data['taglist_infile'] = $this->taglist_infile; | |
| 231 } | |
| 232 | |
| 233 | |
| 209 // topic list | 234 // topic list |
| 210 $topiclistArray = $this->GetTopiclistArray(); | 235 $topiclistArray = $this->GetTopiclistArray(); |
| 211 | 236 |
| 212 | 237 |
| 213 $wordlistArray = $this->GetWordlistArray(); | 238 $wordlistArray = $this->GetWordlistArray(); |
| 214 | 239 |
| 215 $data = array(); | |
| 216 $data['stringInput'] = $stringInput; | 240 $data['stringInput'] = $stringInput; |
| 217 $data['taglistArray'] = $taglistArray; | 241 $data['taglistArray'] = $taglistArray; |
| 218 $data['wordlistArray'] = $wordlistArray; | 242 $data['wordlistArray'] = $wordlistArray; |
| 219 $data['section_id'] = $section_id; | 243 $data['section_id'] = $section_id; |
| 220 $data['topiclistArray'] = $topiclistArray; | 244 $data['topiclistArray'] = $topiclistArray; |
| 1117 | 1141 |
| 1118 | 1142 |
| 1119 return $data; | 1143 return $data; |
| 1120 } | 1144 } |
| 1121 | 1145 |
| 1146 private function ParseMetaData($filename) { | |
| 1147 $text = file_get_contents($filename); | |
| 1148 $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string"); | |
| 1149 | |
| 1150 $this->topic = $xml->topic; // set topic id | |
| 1151 | |
| 1152 // get taglist in file | |
| 1153 $taglist_infile = $xml->taglist; | |
| 1154 $taglistArray = array(); | |
| 1155 foreach ($taglist_infile as $row) { | |
| 1156 array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color)); | |
| 1157 } | |
| 1158 $this->taglist_infile = $taglistArray; | |
| 1159 | |
| 1160 // TODO: get meta data of book | |
| 1161 $this->book_meta = $xml->book; | |
| 1162 | |
| 1163 // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; | |
| 1164 // --- detect if the taglist set is up-to-date or not --- | |
| 1165 | |
| 1166 | |
| 1167 $contentString = $xml->text_content->asXML(); | |
| 1168 | |
| 1169 return $contentString; | |
| 1170 } | |
| 1122 private function GetSectionContent() { | 1171 private function GetSectionContent() { |
| 1123 $section_id = $this->GetSectionId(); | 1172 $section_id = $this->GetSectionId(); |
| 1124 $section_info = $this->GetSectionInfo(); | 1173 $section_info = $this->GetSectionInfo(); |
| 1125 | 1174 |
| 1126 $bookId = $section_info['bookId']; | 1175 $bookId = $section_info['bookId']; |
| 1128 $endPage = $section_info['endPage']; | 1177 $endPage = $section_info['endPage']; |
| 1129 | 1178 |
| 1130 $contentString=""; | 1179 $contentString=""; |
| 1131 $data_path = $this->GetDataPath(); | 1180 $data_path = $this->GetDataPath(); |
| 1132 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { | 1181 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { |
| 1133 $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt"); | 1182 $filename = $data_path."parsing_files/".$section_id.".txt"; |
| 1134 $stringInput = $contentString; | 1183 // --- parsing meta data |
| 1184 $stringInput = $this->ParseMetaData($filename); | |
| 1185 // ---- | |
| 1135 | 1186 |
| 1136 // if the text is from file system | 1187 // if the text is from file system |
| 1137 $stringInput = preg_replace("/ /u", "○", $stringInput); | 1188 $stringInput = preg_replace("/ /u", "○", $stringInput); |
| 1138 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 1189 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
| 1139 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | 1190 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); |
