Mercurial > hg > extraction-interface
comparison develop/models/extractapp.php @ 39:8347776a44fc extractapp
embedded topic, taglist,book metadata in file
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de |
---|---|
date | Sun, 08 Mar 2015 22:28:21 +0100 |
parents | 4b3da4802998 |
children | 2e938dc046db |
comparison
equal
deleted
inserted
replaced
38:4b3da4802998 | 39:8347776a44fc |
---|---|
6 | 6 |
7 public function Index() { | 7 public function Index() { |
8 return array("Index Value 1", "Value 2", "Value 3"); | 8 return array("Index Value 1", "Value 2", "Value 3"); |
9 } | 9 } |
10 | 10 |
11 protected $section_id = 0, $data_path, $file_id = 0, $current_fileId=0, $branch_id = 0, $user_id = 0, $lg_text = "", $topic = 0; | 11 protected $section_id = 0, $data_path, $file_id = 0, $current_fileId=0, |
12 $branch_id = 0, $user_id = 0, $lg_text = "", $topic = 0, $taglist_infile = "", $book_meta = ""; | |
12 public $messages = ""; | 13 public $messages = ""; |
13 | 14 |
14 private function Initialize($_urlvalues) { | 15 private function Initialize($_urlvalues) { |
15 $this->SetSectionId($_urlvalues); | 16 $this->SetSectionId($_urlvalues); |
16 | 17 |
191 | 192 |
192 } | 193 } |
193 */ | 194 */ |
194 | 195 |
195 | 196 |
196 | 197 private function Taglist_infileUpToDate($taglistArray) { |
198 // compare $this->taglist_infile is the same as $taglistArray | |
199 $taglist_infile = $this->taglist_infile; | |
200 if (count($taglist_infile) != count($taglistArray)) { | |
201 return false; | |
202 } | |
203 | |
204 foreach ($taglistArray as $row_indb) { | |
205 foreach ($taglist_infile as $row) { | |
206 //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] ) | |
207 if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) { | |
208 | |
209 } else { | |
210 return false; | |
211 } | |
212 } | |
213 } | |
214 return true; | |
215 } | |
197 // === for tagging === | 216 // === for tagging === |
198 public function StartTagging() { | 217 public function StartTagging() { |
199 | 218 |
200 // $this->Initialize($urlvalues); | |
201 | |
202 $section_id = $this->section_id; | 219 $section_id = $this->section_id; |
203 $stringInput = $this->lg_text; | 220 $stringInput = $this->lg_text; |
204 | 221 |
222 $data = array(); // data array to be passed to view | |
205 | 223 |
206 //$taglistArray = $this->GetTaglistArray(); | 224 //$taglistArray = $this->GetTaglistArray(); |
207 //for GetTaglistByTopicID: | 225 //for GetTaglistByTopicID: |
208 $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); | 226 $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); |
227 | |
228 // TODO: check if taglist_infile is up-to-date | |
229 if ( !$this->Taglist_infileUpToDate($taglistArray)) { | |
230 $data['taglist_infile'] = $this->taglist_infile; | |
231 } | |
232 | |
233 | |
209 // topic list | 234 // topic list |
210 $topiclistArray = $this->GetTopiclistArray(); | 235 $topiclistArray = $this->GetTopiclistArray(); |
211 | 236 |
212 | 237 |
213 $wordlistArray = $this->GetWordlistArray(); | 238 $wordlistArray = $this->GetWordlistArray(); |
214 | 239 |
215 $data = array(); | |
216 $data['stringInput'] = $stringInput; | 240 $data['stringInput'] = $stringInput; |
217 $data['taglistArray'] = $taglistArray; | 241 $data['taglistArray'] = $taglistArray; |
218 $data['wordlistArray'] = $wordlistArray; | 242 $data['wordlistArray'] = $wordlistArray; |
219 $data['section_id'] = $section_id; | 243 $data['section_id'] = $section_id; |
220 $data['topiclistArray'] = $topiclistArray; | 244 $data['topiclistArray'] = $topiclistArray; |
1117 | 1141 |
1118 | 1142 |
1119 return $data; | 1143 return $data; |
1120 } | 1144 } |
1121 | 1145 |
1146 private function ParseMetaData($filename) { | |
1147 $text = file_get_contents($filename); | |
1148 $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string"); | |
1149 | |
1150 $this->topic = $xml->topic; // set topic id | |
1151 | |
1152 // get taglist in file | |
1153 $taglist_infile = $xml->taglist; | |
1154 $taglistArray = array(); | |
1155 foreach ($taglist_infile as $row) { | |
1156 array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color)); | |
1157 } | |
1158 $this->taglist_infile = $taglistArray; | |
1159 | |
1160 // TODO: get meta data of book | |
1161 $this->book_meta = $xml->book; | |
1162 | |
1163 // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; | |
1164 // --- detect if the taglist set is up-to-date or not --- | |
1165 | |
1166 | |
1167 $contentString = $xml->text_content->asXML(); | |
1168 | |
1169 return $contentString; | |
1170 } | |
1122 private function GetSectionContent() { | 1171 private function GetSectionContent() { |
1123 $section_id = $this->GetSectionId(); | 1172 $section_id = $this->GetSectionId(); |
1124 $section_info = $this->GetSectionInfo(); | 1173 $section_info = $this->GetSectionInfo(); |
1125 | 1174 |
1126 $bookId = $section_info['bookId']; | 1175 $bookId = $section_info['bookId']; |
1128 $endPage = $section_info['endPage']; | 1177 $endPage = $section_info['endPage']; |
1129 | 1178 |
1130 $contentString=""; | 1179 $contentString=""; |
1131 $data_path = $this->GetDataPath(); | 1180 $data_path = $this->GetDataPath(); |
1132 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { | 1181 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { |
1133 $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt"); | 1182 $filename = $data_path."parsing_files/".$section_id.".txt"; |
1134 $stringInput = $contentString; | 1183 // --- parsing meta data |
1184 $stringInput = $this->ParseMetaData($filename); | |
1185 // ---- | |
1135 | 1186 |
1136 // if the text is from file system | 1187 // if the text is from file system |
1137 $stringInput = preg_replace("/ /u", "○", $stringInput); | 1188 $stringInput = preg_replace("/ /u", "○", $stringInput); |
1138 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 1189 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
1139 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | 1190 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); |